Spaces:

sharshar1
/

OCR

Running

App Files Files Community

Upload main.py

by anwer-1 - opened Dec 18, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+38

-29

Files changed (1) hide show

main.py +38 -29

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
-from typing import List, Dict, Any
 from io import BytesIO
 from PIL import Image
 import uvicorn
@@ -29,6 +29,7 @@ app.add_middleware(
     allow_headers=["*"],
 )
 @app.on_event("startup")
 async def startup_event():
     print("Server started. OCR models will be loaded lazily on first request.")
@@ -54,52 +55,61 @@ def get_models():
 def process_image(img: np.ndarray, detector, recognizer, min_conf: float) -> List[Dict]:
-    """Process single image and return OCR results."""
     h_img, w_img = img.shape[:2]
-    # Step 1: Detect text regions
     results = detector.predict(img)
     all_rois = []
     all_bboxes = []
     for result in results:
         boxes = result.get("dt_polys", [])
         for box in boxes:
             pts = np.array(box, dtype=np.int32)
             x, y, w, h = cv2.boundingRect(pts)
             x1 = max(x, 0)
             y1 = max(y, 0)
             x2 = min(x + w, w_img)
             y2 = min(y + h, h_img)
             if x2 > x1 and y2 > y1:
                 roi = img[y1:y2, x1:x2]
                 if roi.size > 0:
                     all_rois.append(roi)
                     all_bboxes.append([int(x1), int(y1), int(x2), int(y2)])
-    # Step 2: Recognize text in each ROI
     ocr_results = []
     for i, roi in enumerate(all_rois):
         try:
-            rec_generator = recognizer.predict(roi)
-            rec = next(rec_generator)
             text = rec.get("rec_text", "")
             score = float(rec.get("rec_score", 0.0))
         except:
             text = ""
             score = 0.0
-        if score >= min_conf:
             ocr_results.append({
                 "box_id": i + 1,
                 "text": text,
                 "confidence": round(score, 4),
                 "bbox": all_bboxes[i]
             })
     return ocr_results
@@ -118,18 +128,18 @@ async def ocr_image(
     file: UploadFile = File(...),
     min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
 ):
-    """OCR for images (JPG, PNG, etc.)"""
     try:
         contents = await file.read()
         pil_img = Image.open(BytesIO(contents)).convert("RGB")
         img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
     except:
         raise HTTPException(status_code=400, detail="Invalid image file")
     detector, recognizer = get_models()
     ocr_results = process_image(img, detector, recognizer, min_conf)
-    full_text = "\n".join([r["text"] for r in ocr_results if r["text"]])
     return {
         "items": ocr_results,
         "text": full_text,
@@ -143,34 +153,33 @@ async def ocr_pdf(
     dpi: int = Query(default=300, ge=72, le=600),
     min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
 ):
-    """OCR for PDF files - converts each page to image then extracts text."""
     if not PDF_AVAILABLE:
         raise HTTPException(status_code=500, detail="PDF support not available")
     try:
         contents = await file.read()
         pages = convert_from_bytes(contents, dpi=dpi)
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Invalid PDF file: {e}")
     detector, recognizer = get_models()
     all_results = []
     all_text = []
     for page_num, pil_img in enumerate(pages, start=1):
         img = cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
         page_results = process_image(img, detector, recognizer, min_conf)
-        # Add page number to each result
         for item in page_results:
             item["page"] = page_num
         all_results.extend(page_results)
-        page_text = "\n".join([r["text"] for r in page_results if r["text"]])
         if page_text:
             all_text.append(f"--- Page {page_num} ---\n{page_text}")
     return {
         "pages": len(pages),
         "items": all_results,

 from fastapi import FastAPI, File, UploadFile, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
+from typing import List, Dict
 from io import BytesIO
 from PIL import Image
 import uvicorn
     allow_headers=["*"],
 )
 @app.on_event("startup")
 async def startup_event():
     print("Server started. OCR models will be loaded lazily on first request.")
 def process_image(img: np.ndarray, detector, recognizer, min_conf: float) -> List[Dict]:
     h_img, w_img = img.shape[:2]
+    # 1️⃣ Detect text boxes
     results = detector.predict(img)
     all_rois = []
     all_bboxes = []
     for result in results:
         boxes = result.get("dt_polys", [])
         for box in boxes:
             pts = np.array(box, dtype=np.int32)
             x, y, w, h = cv2.boundingRect(pts)
             x1 = max(x, 0)
             y1 = max(y, 0)
             x2 = min(x + w, w_img)
             y2 = min(y + h, h_img)
             if x2 > x1 and y2 > y1:
                 roi = img[y1:y2, x1:x2]
                 if roi.size > 0:
                     all_rois.append(roi)
                     all_bboxes.append([int(x1), int(y1), int(x2), int(y2)])
+    # 2️⃣ Recognize text
     ocr_results = []
     for i, roi in enumerate(all_rois):
         try:
+            rec_gen = recognizer.predict(roi)
+            rec = next(rec_gen)
             text = rec.get("rec_text", "")
             score = float(rec.get("rec_score", 0.0))
         except:
             text = ""
             score = 0.0
+        if score >= min_conf and text.strip():
             ocr_results.append({
                 "box_id": i + 1,
                 "text": text,
                 "confidence": round(score, 4),
                 "bbox": all_bboxes[i]
             })
+    # ✅ الحل الأساسي هنا
+    # ترتيب عربي: من فوق لتحت ثم من اليمين لليسار
+    ocr_results.sort(
+        key=lambda x: (
+            x["bbox"][1],     # Y (top → bottom)
+            -x["bbox"][0]     # X (right → left)
+        )
+    )
     return ocr_results
     file: UploadFile = File(...),
     min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
 ):
     try:
         contents = await file.read()
         pil_img = Image.open(BytesIO(contents)).convert("RGB")
         img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
     except:
         raise HTTPException(status_code=400, detail="Invalid image file")
     detector, recognizer = get_models()
     ocr_results = process_image(img, detector, recognizer, min_conf)
+    full_text = "\n".join([r["text"] for r in ocr_results])
     return {
         "items": ocr_results,
         "text": full_text,
     dpi: int = Query(default=300, ge=72, le=600),
     min_conf: float = Query(default=0.0, ge=0.0, le=1.0),
 ):
     if not PDF_AVAILABLE:
         raise HTTPException(status_code=500, detail="PDF support not available")
     try:
         contents = await file.read()
         pages = convert_from_bytes(contents, dpi=dpi)
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Invalid PDF file: {e}")
     detector, recognizer = get_models()
     all_results = []
     all_text = []
     for page_num, pil_img in enumerate(pages, start=1):
         img = cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
         page_results = process_image(img, detector, recognizer, min_conf)
         for item in page_results:
             item["page"] = page_num
         all_results.extend(page_results)
+        page_text = "\n".join([r["text"] for r in page_results])
         if page_text:
             all_text.append(f"--- Page {page_num} ---\n{page_text}")
     return {
         "pages": len(pages),
         "items": all_results,