tfg-api / app2.py
asierfg794's picture
EasyOCR 2 readers (lat, cir)
5b1379c
"""
app.py — FastAPI + PaddleOCR zerbitzaria
Hugging Face Spaces-en exekutatzen da (CPU).
Detekzio-estrategia: konfiantza-puntuazio altuena duen scripta hautatu.
"""
import io
import logging
from contextlib import asynccontextmanager
import numpy as np
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from paddleocr import PaddleOCR
from PIL import Image
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def _make_ocr(lang: str) -> PaddleOCR:
return PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=False,
show_log=False,
)
def _extract_with_score(ocr: PaddleOCR, img_array: np.ndarray) -> tuple[str, float]:
"""
Testua eta batez besteko konfiantza-puntuazioa itzultzen ditu.
Puntuazioa 0.0 eta 1.0 artean dago; altuagoa = ziurragoa.
"""
try:
result = ocr.ocr(img_array, cls=True)
if not result or result[0] is None:
return "", 0.0
lines = []
scores = []
for line in result[0]:
if line and line[1]:
text, score = line[1][0], line[1][1]
lines.append(text)
scores.append(score)
if not scores:
return "", 0.0
avg_score = sum(scores) / len(scores)
return "\n".join(lines), avg_score
except Exception as e:
logger.warning("OCR errorea: %s", e)
return "", 0.0
ocrs: dict = {}
SCRIPTS = {
"latin": "latin", # es, en, de, fr, lt, pl...
"chinese": "ch", # Txinera sinplifikatua
"chinese_t": "chinese_cht", # Txinera tradizionala
"japanese": "japan", # Japoniera
"korean": "korean", # Koreera
"arabic": "arabic", # Arabiera, pertsiera, urdua
"cyrillic": "cyrillic", # Errusiera, ukrainera...
"devanagari": "devanagari", # Hindiera, nepalera
}
@asynccontextmanager
async def lifespan(app: FastAPI):
global ocrs
for name, lang in SCRIPTS.items():
logger.info("Kargatzen: %s (%s)...", name, lang)
ocrs[name] = _make_ocr(lang)
logger.info("Instantzia guztiak prest.")
yield
ocrs.clear()
app = FastAPI(
title="OCR API",
description="PaddleOCR-en oinarritutako OCR zerbitzua (script anitz)",
version="5.0.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["POST", "GET"],
allow_headers=["*"],
)
@app.get("/")
async def health_check():
return {
"status": "ok",
"engine": "PaddleOCR 2.7.3",
"scripts": list(SCRIPTS.keys()),
}
@app.post("/predict")
async def predict(image: UploadFile = File(...)):
"""
Script guztiak probatzen ditu eta konfiantza altueneko emaitza itzultzen du.
Erantzuna: { "text": "...", "script": "...", "confidence": 0.95 }
"""
if not ocrs:
raise HTTPException(status_code=503, detail="OCR ez dago prest.")
contents = await image.read()
try:
pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
except Exception:
raise HTTPException(status_code=400, detail="Irudi baliogabea.")
logger.info("Irudia jasota: %s (%dx%d)", image.filename, *pil_image.size)
img_array = np.array(pil_image)
candidates = {
name: _extract_with_score(ocr, img_array)
for name, ocr in ocrs.items()
}
# Konfiantza altuena duen scripta hautatu
best_script, (best_text, best_score) = max(
candidates.items(), key=lambda x: x[1][1]
)
logger.info(
"Script hautatu: %s | Konfiantza: %.3f | Karaktereak: %d",
best_script, best_score, len(best_text)
)
return JSONResponse(content={
"text": best_text,
"script": best_script,
"confidence": round(best_score, 3),
})