Spaces:

ZienabM
/

ocr

Sleeping

App Files Files Community

ZienabM commited on 27 days ago

Commit

c8689fc

verified ·

1 Parent(s): 1c89141

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +21 -0
app.py +225 -0
requirements.txt +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# استبدل صورة CUDA بصورة Python عادية
+FROM python:3.11-slim
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    HF_HOME=/app/.cache/huggingface \
+    TORCH_HOME=/app/.cache/torch
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --upgrade pip && pip install -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "300"]

app.py ADDED Viewed

	@@ -0,0 +1,225 @@

+"""
+DeepSeek-OCR-2 API — HuggingFace Spaces
+========================================
+POST /ocr
+  - image: file upload (jpg/png)
+  - x, y, w, h: optional crop box (pixels). If omitted → full image OCR.
+  - mode: "free" | "markdown" (default: free)
+Returns: { "text": "...", "mode": "...", "cropped": bool }
+"""
+import os
+import io
+import base64
+import tempfile
+import logging
+from contextlib import asynccontextmanager
+from typing import Optional
+import torch
+from PIL import Image
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from transformers import AutoModel, AutoTokenizer
+# ─── Logging ──────────────────────────────────────────────────────────────────
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("ocr-api")
+# ─── Model globals ────────────────────────────────────────────────────────────
+MODEL_NAME = "deepseek-ai/DeepSeek-OCR-2"
+model = None
+tokenizer = None
+PROMPTS = {
+    "markdown": "<image>\n<|grounding|>Convert the document to markdown. ",
+    "free":     "<image>\nFree OCR. ",
+}
+# ─── Lifespan: load model once at startup ─────────────────────────────────────
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global model, tokenizer
+    log.info("Loading DeepSeek-OCR-2 …")
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_NAME, trust_remote_code=True
+    )
+    attn_impl = "eager"
+    dtype     = torch.float32
+    model = AutoModel.from_pretrained(
+        MODEL_NAME,
+        _attn_implementation=attn_impl,
+        trust_remote_code=True,
+        use_safetensors=True,
+        torch_dtype=dtype,
+    )
+    model.eval()
+    # لا .cuda() على CPU
+    log.info("Model ready ✓  (device=cpu)")
+    yield
+    del model, tokenizer
+# ─── App ──────────────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="DeepSeek-OCR-2 API",
+    description="Extract text from image regions using DeepSeek-OCR-2",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],          # restrict to your domain in production
+    allow_methods=["POST", "GET"],
+    allow_headers=["*"],
+)
+# ─── Health ───────────────────────────────────────────────────────────────────
+@app.get("/")
+async def root():
+    return {
+        "status": "ok",
+        "model": MODEL_NAME,
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "endpoints": {
+            "POST /ocr":           "Extract text from image / crop region",
+            "POST /ocr/base64":    "Same but image sent as base64 JSON",
+            "GET  /health":        "Health check",
+        },
+    }
+@app.get("/health")
+async def health():
+    return {"status": "ok", "model_loaded": model is not None}
+# ─── Helper ───────────────────────────────────────────────────────────────────
+def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
+    """Run model inference on a PIL image, return text string."""
+    prompt = PROMPTS.get(mode, PROMPTS["free"])
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+        tmp_path = tmp.name
+        pil_image.save(tmp_path, format="PNG")
+    with tempfile.TemporaryDirectory() as out_dir:
+        result = model.infer(
+            tokenizer,
+            prompt=prompt,
+            image_file=tmp_path,
+            output_path=out_dir,
+            base_size=1024,
+            image_size=768,
+            crop_mode=True,
+            save_results=False,
+        )
+    os.unlink(tmp_path)
+    # result may be a string or a dict; normalise
+    if isinstance(result, dict):
+        return result.get("text", str(result))
+    return str(result) if result else ""
+def crop_image(img: Image.Image, x: int, y: int, w: int, h: int) -> Image.Image:
+    """Crop image; clamp to image bounds."""
+    iw, ih = img.size
+    x1 = max(0, x)
+    y1 = max(0, y)
+    x2 = min(iw, x + w)
+    y2 = min(ih, y + h)
+    if x2 <= x1 or y2 <= y1:
+        raise ValueError(f"Invalid crop box: x={x} y={y} w={w} h={h} (image {iw}×{ih})")
+    return img.crop((x1, y1, x2, y2))
+# ─── Main endpoint: file upload ───────────────────────────────────────────────
+@app.post("/ocr")
+async def ocr_file(
+    image: UploadFile = File(..., description="Image file (JPEG/PNG/WEBP)"),
+    x:     Optional[int] = Form(None, description="Crop left   (px)"),
+    y:     Optional[int] = Form(None, description="Crop top    (px)"),
+    w:     Optional[int] = Form(None, description="Crop width  (px)"),
+    h:     Optional[int] = Form(None, description="Crop height (px)"),
+    mode:  str           = Form("free", description="'free' or 'markdown'"),
+):
+    if model is None:
+        raise HTTPException(503, "Model not loaded yet — try again in a moment")
+    # read image
+    data = await image.read()
+    try:
+        pil_img = Image.open(io.BytesIO(data)).convert("RGB")
+    except Exception as e:
+        raise HTTPException(400, f"Cannot decode image: {e}")
+    cropped = False
+    # crop if bbox provided
+    if all(v is not None for v in [x, y, w, h]):
+        try:
+            pil_img = crop_image(pil_img, x, y, w, h)
+            cropped = True
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+    try:
+        text = run_ocr(pil_img, mode=mode)
+    except Exception as e:
+        log.exception("OCR inference error")
+        raise HTTPException(500, f"OCR failed: {e}")
+    return JSONResponse({
+        "text":    text,
+        "mode":    mode,
+        "cropped": cropped,
+        "bbox":    {"x": x, "y": y, "w": w, "h": h} if cropped else None,
+    })
+# ─── Alternative endpoint: base64 JSON body ───────────────────────────────────
+from pydantic import BaseModel
+class OCRRequest(BaseModel):
+    image_b64: str            # base64-encoded image bytes
+    x: Optional[int] = None
+    y: Optional[int] = None
+    w: Optional[int] = None
+    h: Optional[int] = None
+    mode: str = "free"
+@app.post("/ocr/base64")
+async def ocr_base64(req: OCRRequest):
+    if model is None:
+        raise HTTPException(503, "Model not loaded yet")
+    try:
+        raw = base64.b64decode(req.image_b64)
+        pil_img = Image.open(io.BytesIO(raw)).convert("RGB")
+    except Exception as e:
+        raise HTTPException(400, f"Cannot decode base64 image: {e}")
+    cropped = False
+    if all(v is not None for v in [req.x, req.y, req.w, req.h]):
+        try:
+            pil_img = crop_image(pil_img, req.x, req.y, req.w, req.h)
+            cropped = True
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+    try:
+        text = run_ocr(pil_img, mode=req.mode)
+    except Exception as e:
+        log.exception("OCR inference error")
+        raise HTTPException(500, f"OCR failed: {e}")
+    return JSONResponse({
+        "text":    text,
+        "mode":    req.mode,
+        "cropped": cropped,
+        "bbox":    {"x": req.x, "y": req.y, "w": req.w, "h": req.h} if cropped else None,
+    })

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi>=0.111.0
+uvicorn[standard]>=0.29.0
+python-multipart>=0.0.9
+pillow>=10.0.0
+torch>=2.6.0
+transformers==4.46.3
+tokenizers==0.20.3
+einops
+addict
+easydict
+pydantic>=2.0.0
+huggingface_hub>=0.23.0
+accelerate>=0.30.0