Spaces:

Sam20202
/

GLMOCR_Text_extraction

Sleeping

App Files Files Community

GLMOCR_Text_extraction / main.py

Sam20202

Initial deploy

0533780 about 1 month ago

raw

history blame contribute delete

6.57 kB

	"""
	main.py — FastAPI server for zai-org/GLM-OCR

	Endpoints:
	GET / → Serves the frontend HTML
	GET /health → Liveness probe + model info
	POST /ocr → Run OCR on uploaded image
	GET /metrics → Session-level stats
	"""

	import logging
	import time
	from contextlib import asynccontextmanager
	from pathlib import Path

	import uvicorn
	from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import FileResponse, JSONResponse
	from pydantic import BaseModel
	from typing import Annotated

	from ocr_engine import engine, OcrResult, OcrMode

	# ── Logging ─────────────────────────────────────────────────────────────────

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s \| %(levelname)-8s \| %(name)s — %(message)s",
	datefmt="%H:%M:%S",
	)
	logger = logging.getLogger(__name__)

	# ── Session metrics ─────────────────────────────────────────────────────────

	class SessionMetrics:
	def __init__(self):
	self.total_requests = 0
	self.total_words = 0
	self.total_chars = 0
	self.total_ms = 0.0
	self.errors = 0
	self.started_at = time.time()

	def record(self, result: OcrResult):
	self.total_requests += 1
	self.total_words += result.word_count
	self.total_chars += result.char_count
	self.total_ms += result.latency_ms

	def to_dict(self) -> dict:
	avg = self.total_ms / self.total_requests if self.total_requests else 0
	return {
	"total_requests": self.total_requests,
	"total_words_extracted": self.total_words,
	"total_chars_extracted": self.total_chars,
	"avg_latency_ms": round(avg, 1),
	"error_count": self.errors,
	"uptime_seconds": round(time.time() - self.started_at, 1),
	}

	metrics = SessionMetrics()

	# ── Lifespan ─────────────────────────────────────────────────────────────────

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	logger.info("🚀 Starting up — loading GLM-OCR model …")
	engine.load()
	logger.info("✅ Model ready.")
	yield
	logger.info("🛑 Shutting down …")
	engine.unload()

	# ── App ──────────────────────────────────────────────────────────────────────

	app = FastAPI(
	title="GLM-OCR API",
	description="Self-hosted OCR backend powered by zai-org/GLM-OCR",
	version="1.0.0",
	lifespan=lifespan,
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["GET", "POST"],
	allow_headers=["*"],
	)

	# ── Schemas ───────────────────────────────────────────────────────────────────

	class OcrResponse(BaseModel):
	success: bool
	text: str
	word_count: int
	char_count: int
	latency_ms: float
	mode: str
	model_id: str
	device: str

	# ── Routes ────────────────────────────────────────────────────────────────────

	@app.get("/", include_in_schema=False)
	async def serve_frontend():
	frontend = Path(__file__).parent / "frontend" / "index.html"
	if not frontend.exists():
	return JSONResponse({"message": "Frontend not found."}, 404)
	return FileResponse(str(frontend))


	@app.get("/health")
	async def health():
	return {
	"status": "ok" if engine.loaded else "loading",
	"model": engine.info,
	}


	@app.post("/ocr", response_model=OcrResponse)
	async def run_ocr(
	file: Annotated[UploadFile, File(description="Image file (PNG, JPG, WEBP, BMP, TIFF)")],
	mode: Annotated[OcrMode, Form(description="'recognize' for plain text · 'parse' for structured markdown")] = "recognize",
	):
	"""
	Run GLM-OCR on an uploaded image.

	mode options:
	- `recognize` — extracts raw text, preserves layout (default)
	- `parse` — returns structured markdown (headers, tables, lists)
	"""
	allowed = {"image/png", "image/jpeg", "image/webp", "image/gif", "image/bmp", "image/tiff"}
	if file.content_type and file.content_type not in allowed:
	raise HTTPException(status_code=415, detail=f"Unsupported file type: {file.content_type}")

	image_bytes = await file.read()
	if not image_bytes:
	raise HTTPException(status_code=400, detail="Empty file.")
	if len(image_bytes) > 20 * 1024 * 1024:
	raise HTTPException(status_code=413, detail="File too large. Max 20 MB.")

	logger.info(f"OCR \| file={file.filename} size={len(image_bytes)/1024:.1f}KB mode={mode}")

	try:
	result = engine.run(image_bytes, mode=mode)
	except ValueError as e:
	metrics.errors += 1
	raise HTTPException(status_code=422, detail=str(e))
	except Exception as e:
	metrics.errors += 1
	logger.exception("Inference error")
	raise HTTPException(status_code=500, detail=f"Inference failed: {e}")

	metrics.record(result)
	logger.info(f"Done \| {result.word_count} words \| {result.latency_ms:.0f}ms")

	return OcrResponse(
	success = True,
	text = result.text,
	word_count = result.word_count,
	char_count = result.char_count,
	latency_ms = result.latency_ms,
	mode = result.mode,
	model_id = result.model_id,
	device = result.device,
	)


	@app.get("/metrics")
	async def get_metrics():
	return metrics.to_dict()


	@app.exception_handler(Exception)
	async def global_handler(request: Request, exc: Exception):
	logger.exception(f"Unhandled: {request.url}")
	return JSONResponse(status_code=500, content={"detail": "Internal server error"})


	if __name__ == "__main__":
	uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)