ZienabM commited on
Commit
c8689fc
Β·
verified Β·
1 Parent(s): 1c89141

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +225 -0
  3. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ψ§Ψ³ΨͺΨ¨Ψ―Ω„ ءورة CUDA بءورة Python عادية
2
+ FROM python:3.11-slim
3
+
4
+ ENV DEBIAN_FRONTEND=noninteractive \
5
+ PYTHONUNBUFFERED=1 \
6
+ HF_HOME=/app/.cache/huggingface \
7
+ TORCH_HOME=/app/.cache/torch
8
+
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ git curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ WORKDIR /app
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --upgrade pip && pip install -r requirements.txt
17
+
18
+ COPY app.py .
19
+
20
+ EXPOSE 7860
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "300"]
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DeepSeek-OCR-2 API β€” HuggingFace Spaces
3
+ ========================================
4
+ POST /ocr
5
+ - image: file upload (jpg/png)
6
+ - x, y, w, h: optional crop box (pixels). If omitted β†’ full image OCR.
7
+ - mode: "free" | "markdown" (default: free)
8
+
9
+ Returns: { "text": "...", "mode": "...", "cropped": bool }
10
+ """
11
+
12
+ import os
13
+ import io
14
+ import base64
15
+ import tempfile
16
+ import logging
17
+ from contextlib import asynccontextmanager
18
+ from typing import Optional
19
+
20
+ import torch
21
+ from PIL import Image
22
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
23
+ from fastapi.middleware.cors import CORSMiddleware
24
+ from fastapi.responses import JSONResponse
25
+ from transformers import AutoModel, AutoTokenizer
26
+
27
+ # ─── Logging ──────────────────────────────────────────────────────────────────
28
+ logging.basicConfig(level=logging.INFO)
29
+ log = logging.getLogger("ocr-api")
30
+
31
+ # ─── Model globals ────────────────────────────────────────────────────────────
32
+ MODEL_NAME = "deepseek-ai/DeepSeek-OCR-2"
33
+ model = None
34
+ tokenizer = None
35
+
36
+ PROMPTS = {
37
+ "markdown": "<image>\n<|grounding|>Convert the document to markdown. ",
38
+ "free": "<image>\nFree OCR. ",
39
+ }
40
+
41
+ # ─── Lifespan: load model once at startup ─────────────────────────────────────
42
+ @asynccontextmanager
43
+ async def lifespan(app: FastAPI):
44
+ global model, tokenizer
45
+ log.info("Loading DeepSeek-OCR-2 …")
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained(
48
+ MODEL_NAME, trust_remote_code=True
49
+ )
50
+
51
+ attn_impl = "eager"
52
+ dtype = torch.float32
53
+
54
+ model = AutoModel.from_pretrained(
55
+ MODEL_NAME,
56
+ _attn_implementation=attn_impl,
57
+ trust_remote_code=True,
58
+ use_safetensors=True,
59
+ torch_dtype=dtype,
60
+ )
61
+ model.eval()
62
+ # Ω„Ψ§ .cuda() ΨΉΩ„Ω‰ CPU
63
+ log.info("Model ready βœ“ (device=cpu)")
64
+ yield
65
+ del model, tokenizer
66
+
67
+ # ─── App ──────────────────────────────────────────────────────────────────────
68
+ app = FastAPI(
69
+ title="DeepSeek-OCR-2 API",
70
+ description="Extract text from image regions using DeepSeek-OCR-2",
71
+ version="1.0.0",
72
+ lifespan=lifespan,
73
+ )
74
+
75
+ app.add_middleware(
76
+ CORSMiddleware,
77
+ allow_origins=["*"], # restrict to your domain in production
78
+ allow_methods=["POST", "GET"],
79
+ allow_headers=["*"],
80
+ )
81
+
82
+ # ─── Health ───────────────────────────────────────────────────────────────────
83
+ @app.get("/")
84
+ async def root():
85
+ return {
86
+ "status": "ok",
87
+ "model": MODEL_NAME,
88
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
89
+ "endpoints": {
90
+ "POST /ocr": "Extract text from image / crop region",
91
+ "POST /ocr/base64": "Same but image sent as base64 JSON",
92
+ "GET /health": "Health check",
93
+ },
94
+ }
95
+
96
+ @app.get("/health")
97
+ async def health():
98
+ return {"status": "ok", "model_loaded": model is not None}
99
+
100
+ # ─── Helper ───────────────────────────────────────────────────────────────────
101
+ def run_ocr(pil_image: Image.Image, mode: str = "free") -> str:
102
+ """Run model inference on a PIL image, return text string."""
103
+ prompt = PROMPTS.get(mode, PROMPTS["free"])
104
+
105
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
106
+ tmp_path = tmp.name
107
+ pil_image.save(tmp_path, format="PNG")
108
+
109
+ with tempfile.TemporaryDirectory() as out_dir:
110
+ result = model.infer(
111
+ tokenizer,
112
+ prompt=prompt,
113
+ image_file=tmp_path,
114
+ output_path=out_dir,
115
+ base_size=1024,
116
+ image_size=768,
117
+ crop_mode=True,
118
+ save_results=False,
119
+ )
120
+
121
+ os.unlink(tmp_path)
122
+
123
+ # result may be a string or a dict; normalise
124
+ if isinstance(result, dict):
125
+ return result.get("text", str(result))
126
+ return str(result) if result else ""
127
+
128
+
129
+ def crop_image(img: Image.Image, x: int, y: int, w: int, h: int) -> Image.Image:
130
+ """Crop image; clamp to image bounds."""
131
+ iw, ih = img.size
132
+ x1 = max(0, x)
133
+ y1 = max(0, y)
134
+ x2 = min(iw, x + w)
135
+ y2 = min(ih, y + h)
136
+ if x2 <= x1 or y2 <= y1:
137
+ raise ValueError(f"Invalid crop box: x={x} y={y} w={w} h={h} (image {iw}Γ—{ih})")
138
+ return img.crop((x1, y1, x2, y2))
139
+
140
+
141
+ # ─── Main endpoint: file upload ───────────────────────────────────────────────
142
+ @app.post("/ocr")
143
+ async def ocr_file(
144
+ image: UploadFile = File(..., description="Image file (JPEG/PNG/WEBP)"),
145
+ x: Optional[int] = Form(None, description="Crop left (px)"),
146
+ y: Optional[int] = Form(None, description="Crop top (px)"),
147
+ w: Optional[int] = Form(None, description="Crop width (px)"),
148
+ h: Optional[int] = Form(None, description="Crop height (px)"),
149
+ mode: str = Form("free", description="'free' or 'markdown'"),
150
+ ):
151
+ if model is None:
152
+ raise HTTPException(503, "Model not loaded yet β€” try again in a moment")
153
+
154
+ # read image
155
+ data = await image.read()
156
+ try:
157
+ pil_img = Image.open(io.BytesIO(data)).convert("RGB")
158
+ except Exception as e:
159
+ raise HTTPException(400, f"Cannot decode image: {e}")
160
+
161
+ cropped = False
162
+ # crop if bbox provided
163
+ if all(v is not None for v in [x, y, w, h]):
164
+ try:
165
+ pil_img = crop_image(pil_img, x, y, w, h)
166
+ cropped = True
167
+ except ValueError as e:
168
+ raise HTTPException(400, str(e))
169
+
170
+ try:
171
+ text = run_ocr(pil_img, mode=mode)
172
+ except Exception as e:
173
+ log.exception("OCR inference error")
174
+ raise HTTPException(500, f"OCR failed: {e}")
175
+
176
+ return JSONResponse({
177
+ "text": text,
178
+ "mode": mode,
179
+ "cropped": cropped,
180
+ "bbox": {"x": x, "y": y, "w": w, "h": h} if cropped else None,
181
+ })
182
+
183
+
184
+ # ─── Alternative endpoint: base64 JSON body ───────────────────────────────────
185
+ from pydantic import BaseModel
186
+
187
+ class OCRRequest(BaseModel):
188
+ image_b64: str # base64-encoded image bytes
189
+ x: Optional[int] = None
190
+ y: Optional[int] = None
191
+ w: Optional[int] = None
192
+ h: Optional[int] = None
193
+ mode: str = "free"
194
+
195
+ @app.post("/ocr/base64")
196
+ async def ocr_base64(req: OCRRequest):
197
+ if model is None:
198
+ raise HTTPException(503, "Model not loaded yet")
199
+
200
+ try:
201
+ raw = base64.b64decode(req.image_b64)
202
+ pil_img = Image.open(io.BytesIO(raw)).convert("RGB")
203
+ except Exception as e:
204
+ raise HTTPException(400, f"Cannot decode base64 image: {e}")
205
+
206
+ cropped = False
207
+ if all(v is not None for v in [req.x, req.y, req.w, req.h]):
208
+ try:
209
+ pil_img = crop_image(pil_img, req.x, req.y, req.w, req.h)
210
+ cropped = True
211
+ except ValueError as e:
212
+ raise HTTPException(400, str(e))
213
+
214
+ try:
215
+ text = run_ocr(pil_img, mode=req.mode)
216
+ except Exception as e:
217
+ log.exception("OCR inference error")
218
+ raise HTTPException(500, f"OCR failed: {e}")
219
+
220
+ return JSONResponse({
221
+ "text": text,
222
+ "mode": req.mode,
223
+ "cropped": cropped,
224
+ "bbox": {"x": req.x, "y": req.y, "w": req.w, "h": req.h} if cropped else None,
225
+ })
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.111.0
2
+ uvicorn[standard]>=0.29.0
3
+ python-multipart>=0.0.9
4
+ pillow>=10.0.0
5
+ torch>=2.6.0
6
+ transformers==4.46.3
7
+ tokenizers==0.20.3
8
+ einops
9
+ addict
10
+ easydict
11
+ pydantic>=2.0.0
12
+ huggingface_hub>=0.23.0
13
+ accelerate>=0.30.0