ai-rag / cv_module /src /api /main.py
robrtt's picture
Clean rebuild: all features fixed
02f4591
"""
CV Pipeline API β€” main app.
Startup strategy (HF free CPU):
- API startup: INSTANT
- /health langsung ready
- Prewarm sequential: OCR β†’ YOLO β†’ Captioner (semua instant/cepat)
- CLIP tetap pure lazy β€” hanya di-load saat ada classification request
- Frontend polling /ready β†’ tombol aktif saat model siap
"""
from __future__ import annotations
import os
import sys
import time
import threading
os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")
os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "False")
os.environ.setdefault("POSTHOG_DISABLED", "1")
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger
from .routes import router, get_pipeline
from .readiness import get_readiness
from ..config import get_cv_settings
settings = get_cv_settings()
logger.remove()
logger.add(
sys.stderr,
level="INFO",
colorize=True,
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan> - {message}",
)
logger.add("./logs/cv_api.log", rotation="10 MB", retention="7 days")
app = FastAPI(
title="CV Pipeline API",
version="1.4.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(router, prefix="/api/v1")
def _sequential_prewarm():
"""
Load komponen satu per satu.
OCR/Tesseract: instant
YOLO ONNX: ~0.1s (12MB, baked di image)
Captioner (Groq): instant (cuma init httpx client)
CLIP: pure lazy, tidak di-prewarm
"""
readiness = get_readiness()
pipeline = get_pipeline()
startup_delay = int(os.environ.get("CV_PREWARM_DELAY", "0"))
logger.info(f"Sequential prewarm dimulai dalam {startup_delay}s...")
time.sleep(startup_delay)
targets = [
("ocr", lambda: pipeline.ocr),
("yolo", lambda: pipeline.yolo),
("captioner", lambda: pipeline.captioner),
]
for name, getter in targets:
if readiness.get_status(name).state.value == "ready":
logger.info(f" {name}: sudah ready, skip")
continue
try:
readiness.mark_loading(name)
t0 = time.perf_counter()
logger.info(f" loading {name}...")
_ = getter()
elapsed = time.perf_counter() - t0
readiness.mark_ready(name)
logger.info(f" {name} ready ({elapsed:.1f}s)")
except Exception as e:
readiness.mark_error(name, str(e))
logger.error(f" {name} failed: {e}")
snap = readiness.snapshot()
logger.info(f"Prewarm selesai. State: {snap['overall']}")
@app.on_event("startup")
async def startup():
logger.info("CV Pipeline API starting up...")
logger.info(f"Docs: http://{settings.api_host}:{settings.api_port}/docs")
prewarm = os.environ.get("CV_PREWARM", "true").lower()
if prewarm == "false":
logger.info("CV_PREWARM=false β€” pure lazy-load mode.")
return
logger.info("Starting sequential background prewarm...")
thread = threading.Thread(
target=_sequential_prewarm,
daemon=True,
name="cv-prewarm-sequential",
)
thread.start()