Zienab commited on
Commit
5682ea3
·
verified ·
1 Parent(s): 2499e33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -35
app.py CHANGED
@@ -6,9 +6,11 @@ from PIL import Image, ImageDraw
6
  import io
7
  import base64
8
  import torch
9
- from transformers import AutoModel
10
  import numpy as np
11
  import logging
 
 
12
 
13
  # إعداد التسجيل
14
  logging.basicConfig(level=logging.INFO)
@@ -28,13 +30,14 @@ app.add_middleware(
28
 
29
  # تحميل النموذج مرة واحدة عند بدء التشغيل
30
  model = None
 
31
 
32
  class BoxRegion(BaseModel):
33
  id: int
34
- x1: float
35
- y1: float
36
- x2: float
37
- y2: float
38
 
39
  class OCRRequest(BaseModel):
40
  image: str = Field(..., description="Base64 encoded image")
@@ -57,16 +60,30 @@ class OCRResponse(BaseModel):
57
  @app.on_event("startup")
58
  async def load_model():
59
  """تحميل النموذج عند بدء التشغيل"""
60
- global model
61
  try:
62
  logger.info("Loading DeepSeek OCR model...")
 
 
63
  model = AutoModel.from_pretrained(
64
  "deepseek-ai/DeepSeek-OCR-2",
65
  trust_remote_code=True,
66
- dtype="auto",
67
- device_map="cpu"
 
68
  )
69
  model.eval()
 
 
 
 
 
 
 
 
 
 
 
70
  logger.info("Model loaded successfully!")
71
  except Exception as e:
72
  logger.error(f"Error loading model: {str(e)}")
@@ -87,27 +104,42 @@ def decode_base64_image(base64_string: str) -> Image.Image:
87
  def crop_and_ocr(image: Image.Image, box: BoxRegion) -> str:
88
  """قص المنطقة المحددة وإجراء OCR عليها"""
89
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # قص المنطقة
91
- cropped = image.crop((
92
- int(box.x1 * image.width),
93
- int(box.y1 * image.height),
94
- int(box.x2 * image.width),
95
- int(box.y2 * image.height)
96
- ))
97
 
98
  # إجراء OCR
99
  with torch.no_grad():
100
- result = model(cropped)
101
- text = result.strip() if result else ""
 
 
 
 
 
 
 
102
 
103
- return text
104
  except Exception as e:
105
  logger.error(f"Error processing box {box.id}: {str(e)}")
106
  return ""
107
 
108
  def cleanup_memory():
109
  """تنظيف الذاكرة"""
110
- import gc
111
  gc.collect()
112
  if torch.cuda.is_available():
113
  torch.cuda.empty_cache()
@@ -117,17 +149,20 @@ async def root():
117
  return {
118
  "message": "DeepSeek OCR API",
119
  "status": "active",
120
- "model": "deepseek-ai/DeepSeek-OCR-2"
 
121
  }
122
 
123
  @app.get("/health")
124
  async def health_check():
125
- return {"status": "healthy", "model_loaded": model is not None}
 
 
 
126
 
127
  @app.post("/ocr", response_model=OCRResponse)
128
  async def process_ocr(request: OCRRequest):
129
  """معالجة OCR للمناطق المحددة في الصورة"""
130
- import time
131
  start_time = time.time()
132
 
133
  if model is None:
@@ -155,7 +190,12 @@ async def process_ocr(request: OCRRequest):
155
  full_image_text = None
156
  if request.include_full_image:
157
  with torch.no_grad():
158
- full_image_text = model(image).strip()
 
 
 
 
 
159
 
160
  # حساب وقت المعالجة
161
  processing_time = time.time() - start_time
@@ -166,33 +206,36 @@ async def process_ocr(request: OCRRequest):
166
  return OCRResponse(
167
  results=results,
168
  full_image_text=full_image_text,
169
- processing_time=processing_time
170
  )
171
 
172
  except Exception as e:
173
  cleanup_memory()
 
174
  raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
175
 
176
- @app.post("/ocr/single", response_model=BoxResult)
177
- async def process_single_box(image: str, box: BoxRegion):
178
  """معالجة مربع واحد فقط"""
179
  if model is None:
180
  raise HTTPException(status_code=503, detail="Model not loaded yet")
181
 
182
  try:
183
- img = decode_base64_image(image)
184
- text = crop_and_ocr(img, box)
 
 
185
 
186
  cleanup_memory()
187
 
188
- return BoxResult(
189
- id=box.id,
190
- text=text,
191
- x1=box.x1,
192
- y1=box.y1,
193
- x2=box.x2,
194
- y2=box.y2
195
- )
196
  except Exception as e:
197
  cleanup_memory()
198
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 
6
  import io
7
  import base64
8
  import torch
9
+ from transformers import AutoModel, AutoProcessor
10
  import numpy as np
11
  import logging
12
+ import time
13
+ import gc
14
 
15
  # إعداد التسجيل
16
  logging.basicConfig(level=logging.INFO)
 
30
 
31
  # تحميل النموذج مرة واحدة عند بدء التشغيل
32
  model = None
33
+ processor = None
34
 
35
  class BoxRegion(BaseModel):
36
  id: int
37
+ x1: float = Field(..., ge=0, le=1)
38
+ y1: float = Field(..., ge=0, le=1)
39
+ x2: float = Field(..., ge=0, le=1)
40
+ y2: float = Field(..., ge=0, le=1)
41
 
42
  class OCRRequest(BaseModel):
43
  image: str = Field(..., description="Base64 encoded image")
 
60
  @app.on_event("startup")
61
  async def load_model():
62
  """تحميل النموذج عند بدء التشغيل"""
63
+ global model, processor
64
  try:
65
  logger.info("Loading DeepSeek OCR model...")
66
+
67
+ # تحميل النموذج مع إعدادات محسنة للـ CPU
68
  model = AutoModel.from_pretrained(
69
  "deepseek-ai/DeepSeek-OCR-2",
70
  trust_remote_code=True,
71
+ torch_dtype=torch.float32,
72
+ device_map="cpu",
73
+ low_cpu_mem_usage=True
74
  )
75
  model.eval()
76
+
77
+ # محاولة تحميل المعالج إذا كان متاحاً
78
+ try:
79
+ processor = AutoProcessor.from_pretrained(
80
+ "deepseek-ai/DeepSeek-OCR-2",
81
+ trust_remote_code=True
82
+ )
83
+ except:
84
+ processor = None
85
+ logger.warning("Processor not available, using model directly")
86
+
87
  logger.info("Model loaded successfully!")
88
  except Exception as e:
89
  logger.error(f"Error loading model: {str(e)}")
 
104
  def crop_and_ocr(image: Image.Image, box: BoxRegion) -> str:
105
  """قص المنطقة المحددة وإجراء OCR عليها"""
106
  try:
107
+ # حساب الإحداثيات الفعلية
108
+ img_width, img_height = image.size
109
+
110
+ left = int(box.x1 * img_width)
111
+ top = int(box.y1 * img_height)
112
+ right = int(box.x2 * img_width)
113
+ bottom = int(box.y2 * img_height)
114
+
115
+ # التأكد من أن الإحداثيات صحيحة
116
+ left = max(0, min(left, img_width))
117
+ top = max(0, min(top, img_height))
118
+ right = max(left + 1, min(right, img_width))
119
+ bottom = max(top + 1, min(bottom, img_height))
120
+
121
  # قص المنطقة
122
+ cropped = image.crop((left, top, right, bottom))
 
 
 
 
 
123
 
124
  # إجراء OCR
125
  with torch.no_grad():
126
+ if processor is not None:
127
+ # استخدام processor إذا كان متاحاً
128
+ inputs = processor(images=cropped, return_tensors="pt")
129
+ result = model.generate(**inputs)
130
+ text = processor.decode(result[0], skip_special_tokens=True)
131
+ else:
132
+ # استخدام النموذج مباشرة
133
+ result = model(cropped)
134
+ text = result.strip() if result else ""
135
 
136
+ return text if text else ""
137
  except Exception as e:
138
  logger.error(f"Error processing box {box.id}: {str(e)}")
139
  return ""
140
 
141
  def cleanup_memory():
142
  """تنظيف الذاكرة"""
 
143
  gc.collect()
144
  if torch.cuda.is_available():
145
  torch.cuda.empty_cache()
 
149
  return {
150
  "message": "DeepSeek OCR API",
151
  "status": "active",
152
+ "model": "deepseek-ai/DeepSeek-OCR-2",
153
+ "model_loaded": model is not None
154
  }
155
 
156
  @app.get("/health")
157
  async def health_check():
158
+ return {
159
+ "status": "healthy",
160
+ "model_loaded": model is not None
161
+ }
162
 
163
  @app.post("/ocr", response_model=OCRResponse)
164
  async def process_ocr(request: OCRRequest):
165
  """معالجة OCR للمناطق المحددة في الصورة"""
 
166
  start_time = time.time()
167
 
168
  if model is None:
 
190
  full_image_text = None
191
  if request.include_full_image:
192
  with torch.no_grad():
193
+ if processor is not None:
194
+ inputs = processor(images=image, return_tensors="pt")
195
+ result = model.generate(**inputs)
196
+ full_image_text = processor.decode(result[0], skip_special_tokens=True)
197
+ else:
198
+ full_image_text = model(image).strip()
199
 
200
  # حساب وقت المعالجة
201
  processing_time = time.time() - start_time
 
206
  return OCRResponse(
207
  results=results,
208
  full_image_text=full_image_text,
209
+ processing_time=round(processing_time, 2)
210
  )
211
 
212
  except Exception as e:
213
  cleanup_memory()
214
+ logger.error(f"Processing error: {str(e)}")
215
  raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
216
 
217
+ @app.post("/ocr/single")
218
+ async def process_single_box(request: dict):
219
  """معالجة مربع واحد فقط"""
220
  if model is None:
221
  raise HTTPException(status_code=503, detail="Model not loaded yet")
222
 
223
  try:
224
+ image = decode_base64_image(request["image"])
225
+ box = BoxRegion(**request["box"])
226
+
227
+ text = crop_and_ocr(image, box)
228
 
229
  cleanup_memory()
230
 
231
+ return {
232
+ "id": box.id,
233
+ "text": text,
234
+ "x1": box.x1,
235
+ "y1": box.y1,
236
+ "x2": box.x2,
237
+ "y2": box.y2
238
+ }
239
  except Exception as e:
240
  cleanup_memory()
241
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")