""" main.py — FastAPI server for zai-org/GLM-OCR Endpoints: GET / → Serves the frontend HTML GET /health → Liveness probe + model info POST /ocr → Run OCR on uploaded image GET /metrics → Session-level stats """ import logging import time from contextlib import asynccontextmanager from pathlib import Path import uvicorn from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse from pydantic import BaseModel from typing import Annotated from ocr_engine import engine, OcrResult, OcrMode # ── Logging ───────────────────────────────────────────────────────────────── logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)-8s | %(name)s — %(message)s", datefmt="%H:%M:%S", ) logger = logging.getLogger(__name__) # ── Session metrics ───────────────────────────────────────────────────────── class SessionMetrics: def __init__(self): self.total_requests = 0 self.total_words = 0 self.total_chars = 0 self.total_ms = 0.0 self.errors = 0 self.started_at = time.time() def record(self, result: OcrResult): self.total_requests += 1 self.total_words += result.word_count self.total_chars += result.char_count self.total_ms += result.latency_ms def to_dict(self) -> dict: avg = self.total_ms / self.total_requests if self.total_requests else 0 return { "total_requests": self.total_requests, "total_words_extracted": self.total_words, "total_chars_extracted": self.total_chars, "avg_latency_ms": round(avg, 1), "error_count": self.errors, "uptime_seconds": round(time.time() - self.started_at, 1), } metrics = SessionMetrics() # ── Lifespan ───────────────────────────────────────────────────────────────── @asynccontextmanager async def lifespan(app: FastAPI): logger.info("🚀 Starting up — loading GLM-OCR model …") engine.load() logger.info("✅ Model ready.") yield logger.info("🛑 Shutting down …") engine.unload() # ── App ────────────────────────────────────────────────────────────────────── app = FastAPI( title="GLM-OCR API", description="Self-hosted OCR backend powered by zai-org/GLM-OCR", version="1.0.0", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["GET", "POST"], allow_headers=["*"], ) # ── Schemas ─────────────────────────────────────────────────────────────────── class OcrResponse(BaseModel): success: bool text: str word_count: int char_count: int latency_ms: float mode: str model_id: str device: str # ── Routes ──────────────────────────────────────────────────────────────────── @app.get("/", include_in_schema=False) async def serve_frontend(): frontend = Path(__file__).parent / "frontend" / "index.html" if not frontend.exists(): return JSONResponse({"message": "Frontend not found."}, 404) return FileResponse(str(frontend)) @app.get("/health") async def health(): return { "status": "ok" if engine.loaded else "loading", "model": engine.info, } @app.post("/ocr", response_model=OcrResponse) async def run_ocr( file: Annotated[UploadFile, File(description="Image file (PNG, JPG, WEBP, BMP, TIFF)")], mode: Annotated[OcrMode, Form(description="'recognize' for plain text · 'parse' for structured markdown")] = "recognize", ): """ Run GLM-OCR on an uploaded image. **mode options:** - `recognize` — extracts raw text, preserves layout (default) - `parse` — returns structured markdown (headers, tables, lists) """ allowed = {"image/png", "image/jpeg", "image/webp", "image/gif", "image/bmp", "image/tiff"} if file.content_type and file.content_type not in allowed: raise HTTPException(status_code=415, detail=f"Unsupported file type: {file.content_type}") image_bytes = await file.read() if not image_bytes: raise HTTPException(status_code=400, detail="Empty file.") if len(image_bytes) > 20 * 1024 * 1024: raise HTTPException(status_code=413, detail="File too large. Max 20 MB.") logger.info(f"OCR | file={file.filename} size={len(image_bytes)/1024:.1f}KB mode={mode}") try: result = engine.run(image_bytes, mode=mode) except ValueError as e: metrics.errors += 1 raise HTTPException(status_code=422, detail=str(e)) except Exception as e: metrics.errors += 1 logger.exception("Inference error") raise HTTPException(status_code=500, detail=f"Inference failed: {e}") metrics.record(result) logger.info(f"Done | {result.word_count} words | {result.latency_ms:.0f}ms") return OcrResponse( success = True, text = result.text, word_count = result.word_count, char_count = result.char_count, latency_ms = result.latency_ms, mode = result.mode, model_id = result.model_id, device = result.device, ) @app.get("/metrics") async def get_metrics(): return metrics.to_dict() @app.exception_handler(Exception) async def global_handler(request: Request, exc: Exception): logger.exception(f"Unhandled: {request.url}") return JSONResponse(status_code=500, content={"detail": "Internal server error"}) if __name__ == "__main__": uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)