Spaces:
Sleeping
Sleeping
| """ | |
| main.py β FastAPI server for zai-org/GLM-OCR | |
| Endpoints: | |
| GET / β Serves the frontend HTML | |
| GET /health β Liveness probe + model info | |
| POST /ocr β Run OCR on uploaded image | |
| GET /metrics β Session-level stats | |
| """ | |
| import logging | |
| import time | |
| from contextlib import asynccontextmanager | |
| from pathlib import Path | |
| import uvicorn | |
| from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from pydantic import BaseModel | |
| from typing import Annotated | |
| from ocr_engine import engine, OcrResult, OcrMode | |
| # ββ Logging βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s | %(levelname)-8s | %(name)s β %(message)s", | |
| datefmt="%H:%M:%S", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # ββ Session metrics βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SessionMetrics: | |
| def __init__(self): | |
| self.total_requests = 0 | |
| self.total_words = 0 | |
| self.total_chars = 0 | |
| self.total_ms = 0.0 | |
| self.errors = 0 | |
| self.started_at = time.time() | |
| def record(self, result: OcrResult): | |
| self.total_requests += 1 | |
| self.total_words += result.word_count | |
| self.total_chars += result.char_count | |
| self.total_ms += result.latency_ms | |
| def to_dict(self) -> dict: | |
| avg = self.total_ms / self.total_requests if self.total_requests else 0 | |
| return { | |
| "total_requests": self.total_requests, | |
| "total_words_extracted": self.total_words, | |
| "total_chars_extracted": self.total_chars, | |
| "avg_latency_ms": round(avg, 1), | |
| "error_count": self.errors, | |
| "uptime_seconds": round(time.time() - self.started_at, 1), | |
| } | |
| metrics = SessionMetrics() | |
| # ββ Lifespan βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def lifespan(app: FastAPI): | |
| logger.info("π Starting up β loading GLM-OCR model β¦") | |
| engine.load() | |
| logger.info("β Model ready.") | |
| yield | |
| logger.info("π Shutting down β¦") | |
| engine.unload() | |
| # ββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="GLM-OCR API", | |
| description="Self-hosted OCR backend powered by zai-org/GLM-OCR", | |
| version="1.0.0", | |
| lifespan=lifespan, | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["GET", "POST"], | |
| allow_headers=["*"], | |
| ) | |
| # ββ Schemas βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class OcrResponse(BaseModel): | |
| success: bool | |
| text: str | |
| word_count: int | |
| char_count: int | |
| latency_ms: float | |
| mode: str | |
| model_id: str | |
| device: str | |
| # ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def serve_frontend(): | |
| frontend = Path(__file__).parent / "frontend" / "index.html" | |
| if not frontend.exists(): | |
| return JSONResponse({"message": "Frontend not found."}, 404) | |
| return FileResponse(str(frontend)) | |
| async def health(): | |
| return { | |
| "status": "ok" if engine.loaded else "loading", | |
| "model": engine.info, | |
| } | |
| async def run_ocr( | |
| file: Annotated[UploadFile, File(description="Image file (PNG, JPG, WEBP, BMP, TIFF)")], | |
| mode: Annotated[OcrMode, Form(description="'recognize' for plain text Β· 'parse' for structured markdown")] = "recognize", | |
| ): | |
| """ | |
| Run GLM-OCR on an uploaded image. | |
| **mode options:** | |
| - `recognize` β extracts raw text, preserves layout (default) | |
| - `parse` β returns structured markdown (headers, tables, lists) | |
| """ | |
| allowed = {"image/png", "image/jpeg", "image/webp", "image/gif", "image/bmp", "image/tiff"} | |
| if file.content_type and file.content_type not in allowed: | |
| raise HTTPException(status_code=415, detail=f"Unsupported file type: {file.content_type}") | |
| image_bytes = await file.read() | |
| if not image_bytes: | |
| raise HTTPException(status_code=400, detail="Empty file.") | |
| if len(image_bytes) > 20 * 1024 * 1024: | |
| raise HTTPException(status_code=413, detail="File too large. Max 20 MB.") | |
| logger.info(f"OCR | file={file.filename} size={len(image_bytes)/1024:.1f}KB mode={mode}") | |
| try: | |
| result = engine.run(image_bytes, mode=mode) | |
| except ValueError as e: | |
| metrics.errors += 1 | |
| raise HTTPException(status_code=422, detail=str(e)) | |
| except Exception as e: | |
| metrics.errors += 1 | |
| logger.exception("Inference error") | |
| raise HTTPException(status_code=500, detail=f"Inference failed: {e}") | |
| metrics.record(result) | |
| logger.info(f"Done | {result.word_count} words | {result.latency_ms:.0f}ms") | |
| return OcrResponse( | |
| success = True, | |
| text = result.text, | |
| word_count = result.word_count, | |
| char_count = result.char_count, | |
| latency_ms = result.latency_ms, | |
| mode = result.mode, | |
| model_id = result.model_id, | |
| device = result.device, | |
| ) | |
| async def get_metrics(): | |
| return metrics.to_dict() | |
| async def global_handler(request: Request, exc: Exception): | |
| logger.exception(f"Unhandled: {request.url}") | |
| return JSONResponse(status_code=500, content={"detail": "Internal server error"}) | |
| if __name__ == "__main__": | |
| uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False) |