Files changed (1) hide show
  1. main.py +38 -29
main.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI, File, UploadFile, HTTPException, Query
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from typing import List, Dict, Any
4
  from io import BytesIO
5
  from PIL import Image
6
  import uvicorn
@@ -29,6 +29,7 @@ app.add_middleware(
29
  allow_headers=["*"],
30
  )
31
 
 
32
  @app.on_event("startup")
33
  async def startup_event():
34
  print("Server started. OCR models will be loaded lazily on first request.")
@@ -54,52 +55,61 @@ def get_models():
54
 
55
 
56
  def process_image(img: np.ndarray, detector, recognizer, min_conf: float) -> List[Dict]:
57
- """Process single image and return OCR results."""
58
  h_img, w_img = img.shape[:2]
59
-
60
- # Step 1: Detect text regions
61
  results = detector.predict(img)
62
-
63
  all_rois = []
64
  all_bboxes = []
65
-
66
  for result in results:
67
  boxes = result.get("dt_polys", [])
68
  for box in boxes:
69
  pts = np.array(box, dtype=np.int32)
70
  x, y, w, h = cv2.boundingRect(pts)
 
71
  x1 = max(x, 0)
72
  y1 = max(y, 0)
73
  x2 = min(x + w, w_img)
74
  y2 = min(y + h, h_img)
75
-
76
  if x2 > x1 and y2 > y1:
77
  roi = img[y1:y2, x1:x2]
78
  if roi.size > 0:
79
  all_rois.append(roi)
80
  all_bboxes.append([int(x1), int(y1), int(x2), int(y2)])
81
-
82
- # Step 2: Recognize text in each ROI
83
  ocr_results = []
84
-
85
  for i, roi in enumerate(all_rois):
86
  try:
87
- rec_generator = recognizer.predict(roi)
88
- rec = next(rec_generator)
89
  text = rec.get("rec_text", "")
90
  score = float(rec.get("rec_score", 0.0))
91
  except:
92
  text = ""
93
  score = 0.0
94
-
95
- if score >= min_conf:
96
  ocr_results.append({
97
  "box_id": i + 1,
98
  "text": text,
99
  "confidence": round(score, 4),
100
  "bbox": all_bboxes[i]
101
  })
102
-
 
 
 
 
 
 
 
 
 
103
  return ocr_results
104
 
105
 
@@ -118,18 +128,18 @@ async def ocr_image(
118
  file: UploadFile = File(...),
119
  min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
120
  ):
121
- """OCR for images (JPG, PNG, etc.)"""
122
  try:
123
  contents = await file.read()
124
  pil_img = Image.open(BytesIO(contents)).convert("RGB")
125
  img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
126
  except:
127
  raise HTTPException(status_code=400, detail="Invalid image file")
128
-
129
  detector, recognizer = get_models()
130
  ocr_results = process_image(img, detector, recognizer, min_conf)
131
- full_text = "\n".join([r["text"] for r in ocr_results if r["text"]])
132
-
 
133
  return {
134
  "items": ocr_results,
135
  "text": full_text,
@@ -143,34 +153,33 @@ async def ocr_pdf(
143
  dpi: int = Query(default=300, ge=72, le=600),
144
  min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
145
  ):
146
- """OCR for PDF files - converts each page to image then extracts text."""
147
  if not PDF_AVAILABLE:
148
  raise HTTPException(status_code=500, detail="PDF support not available")
149
-
150
  try:
151
  contents = await file.read()
152
  pages = convert_from_bytes(contents, dpi=dpi)
153
  except Exception as e:
154
  raise HTTPException(status_code=400, detail=f"Invalid PDF file: {e}")
155
-
156
  detector, recognizer = get_models()
157
-
158
  all_results = []
159
  all_text = []
160
-
161
  for page_num, pil_img in enumerate(pages, start=1):
162
  img = cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
163
  page_results = process_image(img, detector, recognizer, min_conf)
164
-
165
- # Add page number to each result
166
  for item in page_results:
167
  item["page"] = page_num
168
-
169
  all_results.extend(page_results)
170
- page_text = "\n".join([r["text"] for r in page_results if r["text"]])
 
171
  if page_text:
172
  all_text.append(f"--- Page {page_num} ---\n{page_text}")
173
-
174
  return {
175
  "pages": len(pages),
176
  "items": all_results,
 
1
  from fastapi import FastAPI, File, UploadFile, HTTPException, Query
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from typing import List, Dict
4
  from io import BytesIO
5
  from PIL import Image
6
  import uvicorn
 
29
  allow_headers=["*"],
30
  )
31
 
32
+
33
  @app.on_event("startup")
34
  async def startup_event():
35
  print("Server started. OCR models will be loaded lazily on first request.")
 
55
 
56
 
57
  def process_image(img: np.ndarray, detector, recognizer, min_conf: float) -> List[Dict]:
 
58
  h_img, w_img = img.shape[:2]
59
+
60
+ # 1️⃣ Detect text boxes
61
  results = detector.predict(img)
62
+
63
  all_rois = []
64
  all_bboxes = []
65
+
66
  for result in results:
67
  boxes = result.get("dt_polys", [])
68
  for box in boxes:
69
  pts = np.array(box, dtype=np.int32)
70
  x, y, w, h = cv2.boundingRect(pts)
71
+
72
  x1 = max(x, 0)
73
  y1 = max(y, 0)
74
  x2 = min(x + w, w_img)
75
  y2 = min(y + h, h_img)
76
+
77
  if x2 > x1 and y2 > y1:
78
  roi = img[y1:y2, x1:x2]
79
  if roi.size > 0:
80
  all_rois.append(roi)
81
  all_bboxes.append([int(x1), int(y1), int(x2), int(y2)])
82
+
83
+ # 2️⃣ Recognize text
84
  ocr_results = []
85
+
86
  for i, roi in enumerate(all_rois):
87
  try:
88
+ rec_gen = recognizer.predict(roi)
89
+ rec = next(rec_gen)
90
  text = rec.get("rec_text", "")
91
  score = float(rec.get("rec_score", 0.0))
92
  except:
93
  text = ""
94
  score = 0.0
95
+
96
+ if score >= min_conf and text.strip():
97
  ocr_results.append({
98
  "box_id": i + 1,
99
  "text": text,
100
  "confidence": round(score, 4),
101
  "bbox": all_bboxes[i]
102
  })
103
+
104
+ # ✅ الحل الأساسي هنا
105
+ # ترتيب عربي: من فوق لتحت ثم من اليمين لليسار
106
+ ocr_results.sort(
107
+ key=lambda x: (
108
+ x["bbox"][1], # Y (top → bottom)
109
+ -x["bbox"][0] # X (right → left)
110
+ )
111
+ )
112
+
113
  return ocr_results
114
 
115
 
 
128
  file: UploadFile = File(...),
129
  min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
130
  ):
 
131
  try:
132
  contents = await file.read()
133
  pil_img = Image.open(BytesIO(contents)).convert("RGB")
134
  img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
135
  except:
136
  raise HTTPException(status_code=400, detail="Invalid image file")
137
+
138
  detector, recognizer = get_models()
139
  ocr_results = process_image(img, detector, recognizer, min_conf)
140
+
141
+ full_text = "\n".join([r["text"] for r in ocr_results])
142
+
143
  return {
144
  "items": ocr_results,
145
  "text": full_text,
 
153
  dpi: int = Query(default=300, ge=72, le=600),
154
  min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
155
  ):
 
156
  if not PDF_AVAILABLE:
157
  raise HTTPException(status_code=500, detail="PDF support not available")
158
+
159
  try:
160
  contents = await file.read()
161
  pages = convert_from_bytes(contents, dpi=dpi)
162
  except Exception as e:
163
  raise HTTPException(status_code=400, detail=f"Invalid PDF file: {e}")
164
+
165
  detector, recognizer = get_models()
166
+
167
  all_results = []
168
  all_text = []
169
+
170
  for page_num, pil_img in enumerate(pages, start=1):
171
  img = cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
172
  page_results = process_image(img, detector, recognizer, min_conf)
173
+
 
174
  for item in page_results:
175
  item["page"] = page_num
176
+
177
  all_results.extend(page_results)
178
+
179
+ page_text = "\n".join([r["text"] for r in page_results])
180
  if page_text:
181
  all_text.append(f"--- Page {page_num} ---\n{page_text}")
182
+
183
  return {
184
  "pages": len(pages),
185
  "items": all_results,