Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,11 @@ from PIL import Image, ImageDraw
|
|
| 6 |
import io
|
| 7 |
import base64
|
| 8 |
import torch
|
| 9 |
-
from transformers import AutoModel
|
| 10 |
import numpy as np
|
| 11 |
import logging
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# إعداد التسجيل
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -28,13 +30,14 @@ app.add_middleware(
|
|
| 28 |
|
| 29 |
# تحميل النموذج مرة واحدة عند بدء التشغيل
|
| 30 |
model = None
|
|
|
|
| 31 |
|
| 32 |
class BoxRegion(BaseModel):
|
| 33 |
id: int
|
| 34 |
-
x1: float
|
| 35 |
-
y1: float
|
| 36 |
-
x2: float
|
| 37 |
-
y2: float
|
| 38 |
|
| 39 |
class OCRRequest(BaseModel):
|
| 40 |
image: str = Field(..., description="Base64 encoded image")
|
|
@@ -57,16 +60,30 @@ class OCRResponse(BaseModel):
|
|
| 57 |
@app.on_event("startup")
|
| 58 |
async def load_model():
|
| 59 |
"""تحميل النموذج عند بدء التشغيل"""
|
| 60 |
-
global model
|
| 61 |
try:
|
| 62 |
logger.info("Loading DeepSeek OCR model...")
|
|
|
|
|
|
|
| 63 |
model = AutoModel.from_pretrained(
|
| 64 |
"deepseek-ai/DeepSeek-OCR-2",
|
| 65 |
trust_remote_code=True,
|
| 66 |
-
|
| 67 |
-
device_map="cpu"
|
|
|
|
| 68 |
)
|
| 69 |
model.eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
logger.info("Model loaded successfully!")
|
| 71 |
except Exception as e:
|
| 72 |
logger.error(f"Error loading model: {str(e)}")
|
|
@@ -87,27 +104,42 @@ def decode_base64_image(base64_string: str) -> Image.Image:
|
|
| 87 |
def crop_and_ocr(image: Image.Image, box: BoxRegion) -> str:
|
| 88 |
"""قص المنطقة المحددة وإجراء OCR عليها"""
|
| 89 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# قص المنطقة
|
| 91 |
-
cropped = image.crop((
|
| 92 |
-
int(box.x1 * image.width),
|
| 93 |
-
int(box.y1 * image.height),
|
| 94 |
-
int(box.x2 * image.width),
|
| 95 |
-
int(box.y2 * image.height)
|
| 96 |
-
))
|
| 97 |
|
| 98 |
# إجراء OCR
|
| 99 |
with torch.no_grad():
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
return text
|
| 104 |
except Exception as e:
|
| 105 |
logger.error(f"Error processing box {box.id}: {str(e)}")
|
| 106 |
return ""
|
| 107 |
|
| 108 |
def cleanup_memory():
|
| 109 |
"""تنظيف الذاكرة"""
|
| 110 |
-
import gc
|
| 111 |
gc.collect()
|
| 112 |
if torch.cuda.is_available():
|
| 113 |
torch.cuda.empty_cache()
|
|
@@ -117,17 +149,20 @@ async def root():
|
|
| 117 |
return {
|
| 118 |
"message": "DeepSeek OCR API",
|
| 119 |
"status": "active",
|
| 120 |
-
"model": "deepseek-ai/DeepSeek-OCR-2"
|
|
|
|
| 121 |
}
|
| 122 |
|
| 123 |
@app.get("/health")
|
| 124 |
async def health_check():
|
| 125 |
-
return {
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
@app.post("/ocr", response_model=OCRResponse)
|
| 128 |
async def process_ocr(request: OCRRequest):
|
| 129 |
"""معالجة OCR للمناطق المحددة في الصورة"""
|
| 130 |
-
import time
|
| 131 |
start_time = time.time()
|
| 132 |
|
| 133 |
if model is None:
|
|
@@ -155,7 +190,12 @@ async def process_ocr(request: OCRRequest):
|
|
| 155 |
full_image_text = None
|
| 156 |
if request.include_full_image:
|
| 157 |
with torch.no_grad():
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
# حساب وقت المعالجة
|
| 161 |
processing_time = time.time() - start_time
|
|
@@ -166,33 +206,36 @@ async def process_ocr(request: OCRRequest):
|
|
| 166 |
return OCRResponse(
|
| 167 |
results=results,
|
| 168 |
full_image_text=full_image_text,
|
| 169 |
-
processing_time=processing_time
|
| 170 |
)
|
| 171 |
|
| 172 |
except Exception as e:
|
| 173 |
cleanup_memory()
|
|
|
|
| 174 |
raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
|
| 175 |
|
| 176 |
-
@app.post("/ocr/single"
|
| 177 |
-
async def process_single_box(
|
| 178 |
"""معالجة مربع واحد فقط"""
|
| 179 |
if model is None:
|
| 180 |
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
| 181 |
|
| 182 |
try:
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
| 185 |
|
| 186 |
cleanup_memory()
|
| 187 |
|
| 188 |
-
return
|
| 189 |
-
id
|
| 190 |
-
text
|
| 191 |
-
x1
|
| 192 |
-
y1
|
| 193 |
-
x2
|
| 194 |
-
y2
|
| 195 |
-
|
| 196 |
except Exception as e:
|
| 197 |
cleanup_memory()
|
| 198 |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|
|
|
|
| 6 |
import io
|
| 7 |
import base64
|
| 8 |
import torch
|
| 9 |
+
from transformers import AutoModel, AutoProcessor
|
| 10 |
import numpy as np
|
| 11 |
import logging
|
| 12 |
+
import time
|
| 13 |
+
import gc
|
| 14 |
|
| 15 |
# إعداد التسجيل
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 30 |
|
| 31 |
# تحميل النموذج مرة واحدة عند بدء التشغيل
|
| 32 |
model = None
|
| 33 |
+
processor = None
|
| 34 |
|
| 35 |
class BoxRegion(BaseModel):
|
| 36 |
id: int
|
| 37 |
+
x1: float = Field(..., ge=0, le=1)
|
| 38 |
+
y1: float = Field(..., ge=0, le=1)
|
| 39 |
+
x2: float = Field(..., ge=0, le=1)
|
| 40 |
+
y2: float = Field(..., ge=0, le=1)
|
| 41 |
|
| 42 |
class OCRRequest(BaseModel):
|
| 43 |
image: str = Field(..., description="Base64 encoded image")
|
|
|
|
| 60 |
@app.on_event("startup")
|
| 61 |
async def load_model():
|
| 62 |
"""تحميل النموذج عند بدء التشغيل"""
|
| 63 |
+
global model, processor
|
| 64 |
try:
|
| 65 |
logger.info("Loading DeepSeek OCR model...")
|
| 66 |
+
|
| 67 |
+
# تحميل النموذج مع إعدادات محسنة للـ CPU
|
| 68 |
model = AutoModel.from_pretrained(
|
| 69 |
"deepseek-ai/DeepSeek-OCR-2",
|
| 70 |
trust_remote_code=True,
|
| 71 |
+
torch_dtype=torch.float32,
|
| 72 |
+
device_map="cpu",
|
| 73 |
+
low_cpu_mem_usage=True
|
| 74 |
)
|
| 75 |
model.eval()
|
| 76 |
+
|
| 77 |
+
# محاولة تحميل المعالج إذا كان متاحاً
|
| 78 |
+
try:
|
| 79 |
+
processor = AutoProcessor.from_pretrained(
|
| 80 |
+
"deepseek-ai/DeepSeek-OCR-2",
|
| 81 |
+
trust_remote_code=True
|
| 82 |
+
)
|
| 83 |
+
except:
|
| 84 |
+
processor = None
|
| 85 |
+
logger.warning("Processor not available, using model directly")
|
| 86 |
+
|
| 87 |
logger.info("Model loaded successfully!")
|
| 88 |
except Exception as e:
|
| 89 |
logger.error(f"Error loading model: {str(e)}")
|
|
|
|
| 104 |
def crop_and_ocr(image: Image.Image, box: BoxRegion) -> str:
|
| 105 |
"""قص المنطقة المحددة وإجراء OCR عليها"""
|
| 106 |
try:
|
| 107 |
+
# حساب الإحداثيات الفعلية
|
| 108 |
+
img_width, img_height = image.size
|
| 109 |
+
|
| 110 |
+
left = int(box.x1 * img_width)
|
| 111 |
+
top = int(box.y1 * img_height)
|
| 112 |
+
right = int(box.x2 * img_width)
|
| 113 |
+
bottom = int(box.y2 * img_height)
|
| 114 |
+
|
| 115 |
+
# التأكد من أن الإحداثيات صحيحة
|
| 116 |
+
left = max(0, min(left, img_width))
|
| 117 |
+
top = max(0, min(top, img_height))
|
| 118 |
+
right = max(left + 1, min(right, img_width))
|
| 119 |
+
bottom = max(top + 1, min(bottom, img_height))
|
| 120 |
+
|
| 121 |
# قص المنطقة
|
| 122 |
+
cropped = image.crop((left, top, right, bottom))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
# إجراء OCR
|
| 125 |
with torch.no_grad():
|
| 126 |
+
if processor is not None:
|
| 127 |
+
# استخدام processor إذا كان متاحاً
|
| 128 |
+
inputs = processor(images=cropped, return_tensors="pt")
|
| 129 |
+
result = model.generate(**inputs)
|
| 130 |
+
text = processor.decode(result[0], skip_special_tokens=True)
|
| 131 |
+
else:
|
| 132 |
+
# استخدام النموذج مباشرة
|
| 133 |
+
result = model(cropped)
|
| 134 |
+
text = result.strip() if result else ""
|
| 135 |
|
| 136 |
+
return text if text else ""
|
| 137 |
except Exception as e:
|
| 138 |
logger.error(f"Error processing box {box.id}: {str(e)}")
|
| 139 |
return ""
|
| 140 |
|
| 141 |
def cleanup_memory():
|
| 142 |
"""تنظيف الذاكرة"""
|
|
|
|
| 143 |
gc.collect()
|
| 144 |
if torch.cuda.is_available():
|
| 145 |
torch.cuda.empty_cache()
|
|
|
|
| 149 |
return {
|
| 150 |
"message": "DeepSeek OCR API",
|
| 151 |
"status": "active",
|
| 152 |
+
"model": "deepseek-ai/DeepSeek-OCR-2",
|
| 153 |
+
"model_loaded": model is not None
|
| 154 |
}
|
| 155 |
|
| 156 |
@app.get("/health")
|
| 157 |
async def health_check():
|
| 158 |
+
return {
|
| 159 |
+
"status": "healthy",
|
| 160 |
+
"model_loaded": model is not None
|
| 161 |
+
}
|
| 162 |
|
| 163 |
@app.post("/ocr", response_model=OCRResponse)
|
| 164 |
async def process_ocr(request: OCRRequest):
|
| 165 |
"""معالجة OCR للمناطق المحددة في الصورة"""
|
|
|
|
| 166 |
start_time = time.time()
|
| 167 |
|
| 168 |
if model is None:
|
|
|
|
| 190 |
full_image_text = None
|
| 191 |
if request.include_full_image:
|
| 192 |
with torch.no_grad():
|
| 193 |
+
if processor is not None:
|
| 194 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 195 |
+
result = model.generate(**inputs)
|
| 196 |
+
full_image_text = processor.decode(result[0], skip_special_tokens=True)
|
| 197 |
+
else:
|
| 198 |
+
full_image_text = model(image).strip()
|
| 199 |
|
| 200 |
# حساب وقت المعالجة
|
| 201 |
processing_time = time.time() - start_time
|
|
|
|
| 206 |
return OCRResponse(
|
| 207 |
results=results,
|
| 208 |
full_image_text=full_image_text,
|
| 209 |
+
processing_time=round(processing_time, 2)
|
| 210 |
)
|
| 211 |
|
| 212 |
except Exception as e:
|
| 213 |
cleanup_memory()
|
| 214 |
+
logger.error(f"Processing error: {str(e)}")
|
| 215 |
raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
|
| 216 |
|
| 217 |
+
@app.post("/ocr/single")
|
| 218 |
+
async def process_single_box(request: dict):
|
| 219 |
"""معالجة مربع واحد فقط"""
|
| 220 |
if model is None:
|
| 221 |
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
| 222 |
|
| 223 |
try:
|
| 224 |
+
image = decode_base64_image(request["image"])
|
| 225 |
+
box = BoxRegion(**request["box"])
|
| 226 |
+
|
| 227 |
+
text = crop_and_ocr(image, box)
|
| 228 |
|
| 229 |
cleanup_memory()
|
| 230 |
|
| 231 |
+
return {
|
| 232 |
+
"id": box.id,
|
| 233 |
+
"text": text,
|
| 234 |
+
"x1": box.x1,
|
| 235 |
+
"y1": box.y1,
|
| 236 |
+
"x2": box.x2,
|
| 237 |
+
"y2": box.y2
|
| 238 |
+
}
|
| 239 |
except Exception as e:
|
| 240 |
cleanup_memory()
|
| 241 |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|