Spaces:

hkai20000
/

ocrAPP

Sleeping

App Files Files Community

hkai20000 commited on Jan 31

Commit

f66fde9

verified ·

1 Parent(s): d6e33e8

Update main.py

Browse files

Files changed (1) hide show

main.py +305 -299

main.py CHANGED Viewed

@@ -1,299 +1,305 @@
-from fastapi import FastAPI, UploadFile, File, Form
-from fastapi.responses import JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
-from doctr.io import DocumentFile
-from doctr.models import ocr_predictor
-import cv2
-import numpy as np
-from PIL import Image
-import io
-from typing import Dict, Any, Optional
-app = FastAPI(title="ScanAssured OCR & NER API")
-# Enable CORS for Flutter app
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --- OCR PRESETS ---
-OCR_PRESETS = {
-    "high_accuracy": {
-        "det": "db_resnet50",
-        "reco": "crnn_vgg16_bn",
-        "name": "High Accuracy",
-        "description": "Best quality, slower processing"
-    },
-    "balanced": {
-        "det": "db_resnet50",
-        "reco": "crnn_mobilenet_v3_small",
-        "name": "Balanced (Recommended)",
-        "description": "Good quality and speed"
-    },
-    "fast": {
-        "det": "db_mobilenet_v3_large",
-        "reco": "crnn_mobilenet_v3_small",
-        "name": "Fast",
-        "description": "Fastest processing, slightly lower quality"
-    },
-}
-OCR_DETECTION_MODELS = ["db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18"]
-OCR_RECOGNITION_MODELS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "parseq"]
-# --- NER MODELS ---
-NER_MODELS = {
-    "alvaroalon2/biobert_chemical_ner": {
-        "name": "Chemicals & Diseases",
-        "description": "Identifies chemical compounds and disease names",
-        "entities": ["CHEM", "DIS"]
-    },
-    "d4data/biomedical-ner-all": {
-        "name": "Comprehensive Biomedical",
-        "description": "80+ biomedical entity types including genes, proteins, cells",
-        "entities": ["GENE", "PROTEIN", "CELL", "DISEASE", "CHEMICAL", "SPECIES", "PATHWAY"]
-    },
-    "samrawal/bert-base-uncased_clinical-ner": {
-        "name": "Clinical Notes",
-        "description": "Optimized for clinical/medical notes",
-        "entities": ["PROBLEM", "TREATMENT", "TEST"]
-    },
-    "ukkendane/bert-finetuned-ner-bio": {
-        "name": "Biomedical General",
-        "description": "General biomedical entities from research papers",
-        "entities": ["GENE", "PROTEIN", "DNA", "RNA", "CELL_LINE", "CELL_TYPE"]
-    },
-}
-# --- GLOBAL MODEL CACHES ---
-ner_model_cache: Dict[str, Any] = {}
-ocr_model_cache: Dict[str, Any] = {}
-# --- OCR MODEL LOADING ---
-def get_ocr_predictor(det_arch: str, reco_arch: str):
-    """Retrieves a loaded OCR predictor from cache or loads it if necessary."""
-    cache_key = f"{det_arch}_{reco_arch}"
-    if cache_key in ocr_model_cache:
-        print(f"Using cached OCR model: {cache_key}")
-        return ocr_model_cache[cache_key]
-    try:
-        print(f"Loading OCR model: det={det_arch}, reco={reco_arch}...")
-        predictor = ocr_predictor(
-            det_arch=det_arch,
-            reco_arch=reco_arch,
-            pretrained=True,
-            assume_straight_pages=True
-        )
-        ocr_model_cache[cache_key] = predictor
-        print(f"OCR model {cache_key} loaded successfully!")
-        return predictor
-    except Exception as e:
-        print(f"ERROR: Failed to load OCR model {cache_key}: {e}")
-        return None
-# --- NER MODEL LOADING ---
-def get_ner_pipeline(model_id: str):
-    """Retrieves a loaded NER pipeline from cache or loads it if necessary."""
-    if model_id not in NER_MODELS:
-        raise ValueError(f"Unknown NER model ID: {model_id}")
-    if model_id in ner_model_cache:
-        print(f"Using cached NER model: {model_id}")
-        return ner_model_cache[model_id]
-    try:
-        print(f"Loading NER model: {model_id}...")
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoModelForTokenClassification.from_pretrained(model_id)
-        ner_pipeline = pipeline(
-            "ner",
-            model=model,
-            tokenizer=tokenizer,
-            aggregation_strategy="simple"
-        )
-        ner_model_cache[model_id] = ner_pipeline
-        print(f"NER model {model_id} loaded successfully!")
-        return ner_pipeline
-    except Exception as e:
-        print(f"ERROR: Failed to load NER model {model_id}: {e}")
-        return None
-# --- IMAGE PREPROCESSING ---
-def deskew_image(image: np.ndarray) -> np.ndarray:
-    """Deskew image using projection profile method."""
-    try:
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
-        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-        lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
-        if lines is not None and len(lines) > 0:
-            angles = []
-            for line in lines:
-                x1, y1, x2, y2 = line[0]
-                angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
-                if abs(angle) < 45:
-                    angles.append(angle)
-            if angles:
-                median_angle = np.median(angles)
-                if abs(median_angle) > 0.5:
-                    (h, w) = image.shape[:2]
-                    center = (w // 2, h // 2)
-                    M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
-                    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
-                    return rotated
-        return image
-    except Exception as e:
-        print(f"Deskew warning: {e}")
-        return image
-def preprocess_for_doctr(file_content: bytes) -> np.ndarray:
-    """Automatic preprocessing pipeline optimized for docTR."""
-    nparr = np.frombuffer(file_content, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-    if img is None:
-        raise ValueError("Failed to decode image")
-    img = deskew_image(img)
-    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
-    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-    lab[:, :, 0] = clahe.apply(lab[:, :, 0])
-    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
-    img = cv2.fastNlMeansDenoisingColored(img, None, 6, 6, 7, 21)
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    return img
-def basic_cleanup(text: str) -> str:
-    """Clean up OCR text for NER processing."""
-    text = " ".join(text.split())
-    return text
-# --- FastAPI Routes ---
-@app.get("/")
-async def root():
-    """Health check endpoint."""
-    return {"status": "running", "message": "ScanAssured OCR & NER API"}
-@app.get("/models")
-async def get_available_models():
-    """Return all available OCR and NER models."""
-    return {
-        "ocr_presets": [
-            {
-                "id": preset_id,
-                "name": preset_data["name"],
-                "description": preset_data["description"]
-            }
-            for preset_id, preset_data in OCR_PRESETS.items()
-        ],
-        "ocr_detection_models": OCR_DETECTION_MODELS,
-        "ocr_recognition_models": OCR_RECOGNITION_MODELS,
-        "ner_models": {
-            model_id: {
-                "name": model_data["name"],
-                "description": model_data["description"],
-                "entities": model_data["entities"]
-            }
-            for model_id, model_data in NER_MODELS.items()
-        }
-    }
-@app.post("/process")
-async def process_image(
-    file: UploadFile = File(...),
-    ner_model_id: str = Form(...),
-    ocr_preset: str = Form("balanced"),
-    ocr_det_model: Optional[str] = Form(None),
-    ocr_reco_model: Optional[str] = Form(None),
-):
-    """Process an image with OCR and NER."""
-    # Determine OCR models
-    if ocr_det_model and ocr_reco_model:
-        det_arch = ocr_det_model
-        reco_arch = ocr_reco_model
-    else:
-        preset = OCR_PRESETS.get(ocr_preset, OCR_PRESETS["balanced"])
-        det_arch = preset["det"]
-        reco_arch = preset["reco"]
-    # Validate NER model
-    if ner_model_id not in NER_MODELS:
-        return JSONResponse(
-            status_code=400,
-            content={"detail": f"Unknown NER model: {ner_model_id}"}
-        )
-    # Get OCR predictor
-    ocr_predictor_instance = get_ocr_predictor(det_arch, reco_arch)
-    if not ocr_predictor_instance:
-        return JSONResponse(
-            status_code=503,
-            content={"detail": f"Failed to load OCR model: {det_arch}/{reco_arch}"}
-        )
-    # Get NER pipeline
-    ner_pipeline = get_ner_pipeline(ner_model_id)
-    if not ner_pipeline:
-        return JSONResponse(
-            status_code=503,
-            content={"detail": f"Failed to load NER model: {ner_model_id}"}
-        )
-    try:
-        # Read and preprocess image
-        file_content = await file.read()
-        preprocessed_img = preprocess_for_doctr(file_content)
-        # Perform OCR with docTR
-        print("Running docTR OCR...")
-        doc = DocumentFile.from_images([preprocessed_img])
-        result = ocr_predictor_instance(doc)
-        raw_text = result.render()
-        cleaned_text = basic_cleanup(raw_text)
-        print(f"OCR Text (first 200 chars): {cleaned_text[:200]}...")
-        # Perform NER
-        print("Running NER...")
-        entities = ner_pipeline(cleaned_text)
-        # Filter and structure entities
-        structured_entities = []
-        for entity in entities:
-            if entity.get('score', 0.0) > 0.6:
-                structured_entities.append({
-                    'entity_group': entity['entity_group'],
-                    'score': float(entity['score']),
-                    'word': entity['word'].strip(),
-                })
-        return {
-            "cleaned_text": cleaned_text,
-            "medical_entities": structured_entities,
-            "model_id": NER_MODELS[ner_model_id]["name"],
-            "ocr_model": f"{det_arch} + {reco_arch}"
-        }
-    except Exception as e:
-        print(f"Processing error: {e}")
-        import traceback
-        traceback.print_exc()
-        return JSONResponse(
-            status_code=500,
-            content={"detail": f"An error occurred during processing: {str(e)}"}
-        )

+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+from doctr.io import DocumentFile
+from doctr.models import ocr_predictor
+import cv2
+import numpy as np
+from PIL import Image
+import io
+from typing import Dict, Any, Optional
+app = FastAPI(title="ScanAssured OCR & NER API")
+# Enable CORS for Flutter app
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- OCR PRESETS ---
+OCR_PRESETS = {
+    "high_accuracy": {
+        "det": "db_resnet50",
+        "reco": "crnn_vgg16_bn",
+        "name": "High Accuracy",
+        "description": "Best quality, slower processing"
+    },
+    "balanced": {
+        "det": "db_resnet50",
+        "reco": "crnn_mobilenet_v3_small",
+        "name": "Balanced (Recommended)",
+        "description": "Good quality and speed"
+    },
+    "fast": {
+        "det": "db_mobilenet_v3_large",
+        "reco": "crnn_mobilenet_v3_small",
+        "name": "Fast",
+        "description": "Fastest processing, slightly lower quality"
+    },
+}
+OCR_DETECTION_MODELS = ["db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18"]
+OCR_RECOGNITION_MODELS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "parseq"]
+# --- NER MODELS ---
+NER_MODELS = {
+    "alvaroalon2/biobert_chemical_ner": {
+        "name": "Chemicals & Diseases",
+        "description": "Identifies chemical compounds and disease names",
+        "entities": ["CHEM", "DIS"]
+    },
+    "d4data/biomedical-ner-all": {
+        "name": "Comprehensive Biomedical",
+        "description": "80+ biomedical entity types including genes, proteins, cells",
+        "entities": ["GENE", "PROTEIN", "CELL", "DISEASE", "CHEMICAL", "SPECIES", "PATHWAY"]
+    },
+    "samrawal/bert-base-uncased_clinical-ner": {
+        "name": "Clinical Notes",
+        "description": "Optimized for clinical/medical notes",
+        "entities": ["PROBLEM", "TREATMENT", "TEST"]
+    },
+    "ukkendane/bert-finetuned-ner-bio": {
+        "name": "Biomedical General",
+        "description": "General biomedical entities from research papers",
+        "entities": ["GENE", "PROTEIN", "DNA", "RNA", "CELL_LINE", "CELL_TYPE"]
+    },
+}
+# --- GLOBAL MODEL CACHES ---
+ner_model_cache: Dict[str, Any] = {}
+ocr_model_cache: Dict[str, Any] = {}
+# --- OCR MODEL LOADING ---
+def get_ocr_predictor(det_arch: str, reco_arch: str):
+    """Retrieves a loaded OCR predictor from cache or loads it if necessary."""
+    cache_key = f"{det_arch}_{reco_arch}"
+    if cache_key in ocr_model_cache:
+        print(f"Using cached OCR model: {cache_key}")
+        return ocr_model_cache[cache_key]
+    try:
+        print(f"Loading OCR model: det={det_arch}, reco={reco_arch}...")
+        predictor = ocr_predictor(
+            det_arch=det_arch,
+            reco_arch=reco_arch,
+            pretrained=True,
+            assume_straight_pages=True
+        )
+        ocr_model_cache[cache_key] = predictor
+        print(f"OCR model {cache_key} loaded successfully!")
+        return predictor
+    except Exception as e:
+        print(f"ERROR: Failed to load OCR model {cache_key}: {e}")
+        return None
+# --- NER MODEL LOADING ---
+def get_ner_pipeline(model_id: str):
+    """Retrieves a loaded NER pipeline from cache or loads it if necessary."""
+    if model_id not in NER_MODELS:
+        raise ValueError(f"Unknown NER model ID: {model_id}")
+    if model_id in ner_model_cache:
+        print(f"Using cached NER model: {model_id}")
+        return ner_model_cache[model_id]
+    try:
+        print(f"Loading NER model: {model_id}...")
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForTokenClassification.from_pretrained(model_id)
+        ner_pipeline = pipeline(
+            "ner",
+            model=model,
+            tokenizer=tokenizer,
+            aggregation_strategy="simple"
+        )
+        ner_model_cache[model_id] = ner_pipeline
+        print(f"NER model {model_id} loaded successfully!")
+        return ner_pipeline
+    except Exception as e:
+        print(f"ERROR: Failed to load NER model {model_id}: {e}")
+        return None
+# --- IMAGE PREPROCESSING ---
+def deskew_image(image: np.ndarray) -> np.ndarray:
+    """Deskew image using projection profile method."""
+    try:
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+        lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
+        if lines is not None and len(lines) > 0:
+            angles = []
+            for line in lines:
+                x1, y1, x2, y2 = line[0]
+                angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
+                if abs(angle) < 45:
+                    angles.append(angle)
+            if angles:
+                median_angle = np.median(angles)
+                if abs(median_angle) > 0.5:
+                    (h, w) = image.shape[:2]
+                    center = (w // 2, h // 2)
+                    M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
+                    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+                    return rotated
+        return image
+    except Exception as e:
+        print(f"Deskew warning: {e}")
+        return image
+def preprocess_for_doctr(file_content: bytes) -> np.ndarray:
+    """Automatic preprocessing pipeline optimized for docTR."""
+    nparr = np.frombuffer(file_content, np.uint8)
+    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if img is None:
+        raise ValueError("Failed to decode image")
+    img = deskew_image(img)
+    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    lab[:, :, 0] = clahe.apply(lab[:, :, 0])
+    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+    img = cv2.fastNlMeansDenoisingColored(img, None, 6, 6, 7, 21)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    return img
+def basic_cleanup(text: str) -> str:
+    """Clean up OCR text for NER processing."""
+    text = " ".join(text.split())
+    return text
+# --- FastAPI Routes ---
+@app.get("/")
+async def root():
+    """Health check endpoint."""
+    return {"status": "running", "message": "ScanAssured OCR & NER API"}
+@app.get("/models")
+async def get_available_models():
+    """Return all available OCR and NER models."""
+    return {
+        "ocr_presets": [
+            {
+                "id": preset_id,
+                "name": preset_data["name"],
+                "description": preset_data["description"]
+            }
+            for preset_id, preset_data in OCR_PRESETS.items()
+        ],
+        "ocr_detection_models": OCR_DETECTION_MODELS,
+        "ocr_recognition_models": OCR_RECOGNITION_MODELS,
+        "ner_models": {
+            model_id: {
+                "name": model_data["name"],
+                "description": model_data["description"],
+                "entities": model_data["entities"]
+            }
+            for model_id, model_data in NER_MODELS.items()
+        }
+    }
+@app.post("/process")
+async def process_image(
+    file: UploadFile = File(...),
+    ner_model_id: str = Form(...),
+    ocr_preset: str = Form("balanced"),
+    ocr_det_model: Optional[str] = Form(None),
+    ocr_reco_model: Optional[str] = Form(None),
+):
+    """Process an image with OCR and NER."""
+    # Determine OCR models
+    if ocr_det_model and ocr_reco_model:
+        det_arch = ocr_det_model
+        reco_arch = ocr_reco_model
+    else:
+        preset = OCR_PRESETS.get(ocr_preset, OCR_PRESETS["balanced"])
+        det_arch = preset["det"]
+        reco_arch = preset["reco"]
+    # Validate NER model
+    if ner_model_id not in NER_MODELS:
+        return JSONResponse(
+            status_code=400,
+            content={"detail": f"Unknown NER model: {ner_model_id}"}
+        )
+    # Get OCR predictor
+    ocr_predictor_instance = get_ocr_predictor(det_arch, reco_arch)
+    if not ocr_predictor_instance:
+        return JSONResponse(
+            status_code=503,
+            content={"detail": f"Failed to load OCR model: {det_arch}/{reco_arch}"}
+        )
+    # Get NER pipeline
+    ner_pipeline = get_ner_pipeline(ner_model_id)
+    if not ner_pipeline:
+        return JSONResponse(
+            status_code=503,
+            content={"detail": f"Failed to load NER model: {ner_model_id}"}
+        )
+    try:
+        # Read and preprocess image
+        file_content = await file.read()
+        preprocessed_img = preprocess_for_doctr(file_content)
+        # Perform OCR with docTR
+        print("Running docTR OCR...")
+        # Convert numpy array to bytes for docTR
+        pil_img = Image.fromarray(preprocessed_img)
+        img_byte_arr = io.BytesIO()
+        pil_img.save(img_byte_arr, format='PNG')
+        img_bytes = img_byte_arr.getvalue()
+        doc = DocumentFile.from_images([img_bytes])
+        result = ocr_predictor_instance(doc)
+        raw_text = result.render()
+        cleaned_text = basic_cleanup(raw_text)
+        print(f"OCR Text (first 200 chars): {cleaned_text[:200]}...")
+        # Perform NER
+        print("Running NER...")
+        entities = ner_pipeline(cleaned_text)
+        # Filter and structure entities
+        structured_entities = []
+        for entity in entities:
+            if entity.get('score', 0.0) > 0.6:
+                structured_entities.append({
+                    'entity_group': entity['entity_group'],
+                    'score': float(entity['score']),
+                    'word': entity['word'].strip(),
+                })
+        return {
+            "cleaned_text": cleaned_text,
+            "medical_entities": structured_entities,
+            "model_id": NER_MODELS[ner_model_id]["name"],
+            "ocr_model": f"{det_arch} + {reco_arch}"
+        }
+    except Exception as e:
+        print(f"Processing error: {e}")
+        import traceback
+        traceback.print_exc()
+        return JSONResponse(
+            status_code=500,
+            content={"detail": f"An error occurred during processing: {str(e)}"}
+        )