Spaces:
Paused
Paused
Commit
·
0a7e5ec
0
Parent(s):
Initial commit
Browse files- Dockerfile +27 -0
- README.md +105 -0
- README_HF_Deploy.md +66 -0
- app.py +237 -0
- requirements.txt +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
libgl1-mesa-glx \
|
| 9 |
+
libglib2.0-0 \
|
| 10 |
+
libsm6 \
|
| 11 |
+
libxext6 \
|
| 12 |
+
libxrender-dev \
|
| 13 |
+
libgomp1 \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# Copy requirements and install Python dependencies
|
| 17 |
+
COPY requirements.txt .
|
| 18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
+
|
| 20 |
+
# Copy application code
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# Expose port
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
|
| 26 |
+
# Run the application
|
| 27 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: KYB Dots.OCR Text Extraction
|
| 3 |
+
emoji: 🔍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
sdk_version: "0.0.0"
|
| 8 |
+
app_port: 7860
|
| 9 |
+
pinned: false
|
| 10 |
+
license: "private"
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# KYB Dots.OCR Text Extraction
|
| 14 |
+
|
| 15 |
+
This Hugging Face Space provides a FastAPI endpoint for text extraction from identity documents using Dots.OCR with ROI (Region of Interest) support.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- **Text Extraction**: Extract text from identity documents using Dots.OCR
|
| 20 |
+
- **ROI Support**: Process pre-cropped images or full images with ROI coordinates
|
| 21 |
+
- **Field Mapping**: Structured field extraction with confidence scores
|
| 22 |
+
- **MRZ Detection**: Machine Readable Zone data extraction
|
| 23 |
+
- **Standardized API**: Consistent response format for integration
|
| 24 |
+
|
| 25 |
+
## API Endpoints
|
| 26 |
+
|
| 27 |
+
### Health Check
|
| 28 |
+
```
|
| 29 |
+
GET /health
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### Text Extraction
|
| 33 |
+
```
|
| 34 |
+
POST /v1/id/ocr
|
| 35 |
+
Content-Type: multipart/form-data
|
| 36 |
+
|
| 37 |
+
file: <image_file>
|
| 38 |
+
roi: {"x1": 0.0, "y1": 0.0, "x2": 1.0, "y2": 1.0} (optional)
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
## Response Format
|
| 42 |
+
|
| 43 |
+
```json
|
| 44 |
+
{
|
| 45 |
+
"request_id": "uuid",
|
| 46 |
+
"media_type": "image",
|
| 47 |
+
"processing_time": 0.456,
|
| 48 |
+
"detections": [
|
| 49 |
+
{
|
| 50 |
+
"mrz_data": {
|
| 51 |
+
"document_type": "TD3",
|
| 52 |
+
"issuing_country": "NLD",
|
| 53 |
+
"surname": "MULDER",
|
| 54 |
+
"given_names": "THOMAS",
|
| 55 |
+
"document_number": "NLD123456789",
|
| 56 |
+
"nationality": "NLD",
|
| 57 |
+
"date_of_birth": "1990-01-01",
|
| 58 |
+
"gender": "M",
|
| 59 |
+
"date_of_expiry": "2030-01-01",
|
| 60 |
+
"personal_number": "123456789",
|
| 61 |
+
"raw_mrz": "P<NLDMULDER<<THOMAS<<<<<<<<<<<<<<<<<<<<<<<<<",
|
| 62 |
+
"confidence": 0.95
|
| 63 |
+
},
|
| 64 |
+
"extracted_fields": {
|
| 65 |
+
"document_number": {
|
| 66 |
+
"field_name": "document_number",
|
| 67 |
+
"value": "NLD123456789",
|
| 68 |
+
"confidence": 0.92,
|
| 69 |
+
"source": "ocr"
|
| 70 |
+
},
|
| 71 |
+
"surname": {
|
| 72 |
+
"field_name": "surname",
|
| 73 |
+
"value": "MULDER",
|
| 74 |
+
"confidence": 0.96,
|
| 75 |
+
"source": "ocr"
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
]
|
| 80 |
+
}
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## Usage
|
| 84 |
+
|
| 85 |
+
1. Upload an image file (JPEG, PNG)
|
| 86 |
+
2. Optionally provide ROI coordinates as JSON string
|
| 87 |
+
3. Receive structured field extraction results
|
| 88 |
+
|
| 89 |
+
## Environment Variables
|
| 90 |
+
|
| 91 |
+
- `HF_DOTS_MODEL_PATH`: Path to Dots.OCR model weights
|
| 92 |
+
- `HF_DOTS_CONFIDENCE_THRESHOLD`: Confidence threshold for field extraction
|
| 93 |
+
- `HF_DOTS_DEVICE`: Device to use (auto, cpu, cuda)
|
| 94 |
+
- `HF_DOTS_MAX_IMAGE_SIZE`: Maximum image size for processing
|
| 95 |
+
- `HF_DOTS_MRZ_ENABLED`: Enable MRZ detection
|
| 96 |
+
|
| 97 |
+
## Performance
|
| 98 |
+
|
| 99 |
+
- **GPU**: 300-900ms processing time
|
| 100 |
+
- **CPU**: 3-8s processing time
|
| 101 |
+
- **Memory**: ~6GB per instance
|
| 102 |
+
|
| 103 |
+
## Privacy
|
| 104 |
+
|
| 105 |
+
This endpoint processes images temporarily and does not store or log personal information. All field values are redacted in logs for privacy protection.
|
README_HF_Deploy.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dots-OCR Text Extraction Service - HF Deployment
|
| 2 |
+
|
| 3 |
+
## Quick Deploy to Hugging Face Spaces
|
| 4 |
+
|
| 5 |
+
### 1. Create HF Space
|
| 6 |
+
```bash
|
| 7 |
+
# Login to Hugging Face
|
| 8 |
+
huggingface-cli login
|
| 9 |
+
|
| 10 |
+
# Create a new Space
|
| 11 |
+
huggingface-cli repo create dots-ocr-idcard --type space --space_sdk docker --organization algoryn
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
### 2. Prepare for Deployment
|
| 15 |
+
```bash
|
| 16 |
+
# Clone the space locally
|
| 17 |
+
git clone https://huggingface.co/spaces/algoryn/dots-ocr-idcard
|
| 18 |
+
cd dots-ocr-idcard
|
| 19 |
+
|
| 20 |
+
# Copy required files
|
| 21 |
+
cp /path/to/kybtech-ml-pipelines/docker/hf/dots-ocr/* .
|
| 22 |
+
|
| 23 |
+
# Copy field extraction module (needed for structured extraction)
|
| 24 |
+
mkdir -p src/idcard_api
|
| 25 |
+
cp /path/to/kybtech-ml-pipelines/src/idcard_api/field_extraction.py src/idcard_api/
|
| 26 |
+
touch src/idcard_api/__init__.py
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### 3. Create app.py if needed
|
| 30 |
+
The app.py file should already exist. Key features:
|
| 31 |
+
- `/health` endpoint for health checks
|
| 32 |
+
- `/v1/id/ocr` endpoint for text extraction
|
| 33 |
+
- Supports ROI (Region of Interest) cropping
|
| 34 |
+
- Structured field extraction
|
| 35 |
+
- MRZ extraction support
|
| 36 |
+
|
| 37 |
+
### 4. Push to HF
|
| 38 |
+
```bash
|
| 39 |
+
git add .
|
| 40 |
+
git commit -m "Deploy Dots-OCR text extraction service"
|
| 41 |
+
git push
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
### 5. Test the Deployment
|
| 45 |
+
Once deployed (usually takes 5-10 minutes), test with:
|
| 46 |
+
```bash
|
| 47 |
+
# Basic OCR test
|
| 48 |
+
curl -X POST https://algoryn-dots-ocr-idcard.hf.space/v1/id/ocr \
|
| 49 |
+
-H "Authorization: Bearer YOUR_HF_TOKEN" \
|
| 50 |
+
-F "file=@test_image.jpg"
|
| 51 |
+
|
| 52 |
+
# With ROI (region of interest)
|
| 53 |
+
curl -X POST https://algoryn-dots-ocr-idcard.hf.space/v1/id/ocr \
|
| 54 |
+
-H "Authorization: Bearer YOUR_HF_TOKEN" \
|
| 55 |
+
-F "file=@test_image.jpg" \
|
| 56 |
+
-F 'roi={"x1":0.1,"y1":0.1,"x2":0.9,"y2":0.9}'
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
## Environment Variables
|
| 60 |
+
No special environment variables needed. The service runs on port 7860 by default.
|
| 61 |
+
|
| 62 |
+
## Notes
|
| 63 |
+
- Service includes mock mode if Dots-OCR fails to load
|
| 64 |
+
- Health check available at `/health`
|
| 65 |
+
- Structured field extraction included
|
| 66 |
+
- MRZ parsing support built-in
|
app.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HF Dots.OCR Text Extraction Endpoint
|
| 2 |
+
|
| 3 |
+
This FastAPI application provides a Hugging Face Space endpoint for Dots.OCR
|
| 4 |
+
text extraction with ROI support and standardized field extraction schema.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
import uuid
|
| 10 |
+
import json
|
| 11 |
+
import re
|
| 12 |
+
from typing import List, Optional, Dict, Any
|
| 13 |
+
from contextlib import asynccontextmanager
|
| 14 |
+
|
| 15 |
+
import cv2
|
| 16 |
+
import numpy as np
|
| 17 |
+
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
| 18 |
+
from fastapi.responses import JSONResponse
|
| 19 |
+
from pydantic import BaseModel, Field
|
| 20 |
+
import torch
|
| 21 |
+
from PIL import Image
|
| 22 |
+
import io
|
| 23 |
+
import base64
|
| 24 |
+
|
| 25 |
+
# Dots.OCR imports
|
| 26 |
+
try:
|
| 27 |
+
from dots_ocr import DotsOCR
|
| 28 |
+
DOTS_OCR_AVAILABLE = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
DOTS_OCR_AVAILABLE = False
|
| 31 |
+
logging.warning("Dots.OCR not available - using mock implementation")
|
| 32 |
+
|
| 33 |
+
# Import field extraction utilities
|
| 34 |
+
import sys
|
| 35 |
+
import os
|
| 36 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src'))
|
| 37 |
+
from idcard_api.field_extraction import FieldExtractor
|
| 38 |
+
|
| 39 |
+
# Configure logging
|
| 40 |
+
logging.basicConfig(level=logging.INFO)
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
|
| 43 |
+
# Global model instance
|
| 44 |
+
dots_ocr_model = None
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class BoundingBox(BaseModel):
|
| 48 |
+
"""Normalized bounding box coordinates."""
|
| 49 |
+
x1: float = Field(..., ge=0.0, le=1.0, description="Top-left x coordinate")
|
| 50 |
+
y1: float = Field(..., ge=0.0, le=1.0, description="Top-left y coordinate")
|
| 51 |
+
x2: float = Field(..., ge=0.0, le=1.0, description="Bottom-right x coordinate")
|
| 52 |
+
y2: float = Field(..., ge=0.0, le=1.0, description="Bottom-right y coordinate")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class ExtractedField(BaseModel):
|
| 56 |
+
"""Individual extracted field with confidence and source."""
|
| 57 |
+
field_name: str = Field(..., description="Standardized field name")
|
| 58 |
+
value: Optional[str] = Field(None, description="Extracted field value")
|
| 59 |
+
confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence")
|
| 60 |
+
source: str = Field(..., description="Extraction source (e.g., 'ocr')")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class ExtractedFields(BaseModel):
|
| 64 |
+
"""All extracted fields from identity document."""
|
| 65 |
+
document_number: Optional[ExtractedField] = None
|
| 66 |
+
document_type: Optional[ExtractedField] = None
|
| 67 |
+
issuing_country: Optional[ExtractedField] = None
|
| 68 |
+
issuing_authority: Optional[ExtractedField] = None
|
| 69 |
+
surname: Optional[ExtractedField] = None
|
| 70 |
+
given_names: Optional[ExtractedField] = None
|
| 71 |
+
nationality: Optional[ExtractedField] = None
|
| 72 |
+
date_of_birth: Optional[ExtractedField] = None
|
| 73 |
+
gender: Optional[ExtractedField] = None
|
| 74 |
+
place_of_birth: Optional[ExtractedField] = None
|
| 75 |
+
date_of_issue: Optional[ExtractedField] = None
|
| 76 |
+
date_of_expiry: Optional[ExtractedField] = None
|
| 77 |
+
personal_number: Optional[ExtractedField] = None
|
| 78 |
+
optional_data_1: Optional[ExtractedField] = None
|
| 79 |
+
optional_data_2: Optional[ExtractedField] = None
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class MRZData(BaseModel):
|
| 83 |
+
"""Machine Readable Zone data."""
|
| 84 |
+
document_type: Optional[str] = Field(None, description="MRZ document type (TD1|TD2|TD3)")
|
| 85 |
+
issuing_country: Optional[str] = Field(None, description="Issuing country code")
|
| 86 |
+
surname: Optional[str] = Field(None, description="Surname from MRZ")
|
| 87 |
+
given_names: Optional[str] = Field(None, description="Given names from MRZ")
|
| 88 |
+
document_number: Optional[str] = Field(None, description="Document number from MRZ")
|
| 89 |
+
nationality: Optional[str] = Field(None, description="Nationality code from MRZ")
|
| 90 |
+
date_of_birth: Optional[str] = Field(None, description="Date of birth from MRZ")
|
| 91 |
+
gender: Optional[str] = Field(None, description="Gender from MRZ")
|
| 92 |
+
date_of_expiry: Optional[str] = Field(None, description="Date of expiry from MRZ")
|
| 93 |
+
personal_number: Optional[str] = Field(None, description="Personal number from MRZ")
|
| 94 |
+
raw_mrz: Optional[str] = Field(None, description="Raw MRZ text")
|
| 95 |
+
confidence: float = Field(0.0, ge=0.0, le=1.0, description="MRZ extraction confidence")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class OCRDetection(BaseModel):
|
| 99 |
+
"""Single OCR detection result."""
|
| 100 |
+
mrz_data: Optional[MRZData] = Field(None, description="MRZ data if detected")
|
| 101 |
+
extracted_fields: ExtractedFields = Field(..., description="Extracted field data")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class OCRResponse(BaseModel):
|
| 105 |
+
"""OCR API response."""
|
| 106 |
+
request_id: str = Field(..., description="Unique request identifier")
|
| 107 |
+
media_type: str = Field(..., description="Media type processed")
|
| 108 |
+
processing_time: float = Field(..., description="Processing time in seconds")
|
| 109 |
+
detections: List[OCRDetection] = Field(..., description="List of OCR detections")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# FieldExtractor is now imported from the shared module
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def crop_image_by_roi(image: np.ndarray, roi: BoundingBox) -> np.ndarray:
|
| 116 |
+
"""Crop image using ROI coordinates."""
|
| 117 |
+
h, w = image.shape[:2]
|
| 118 |
+
x1 = int(roi.x1 * w)
|
| 119 |
+
y1 = int(roi.y1 * h)
|
| 120 |
+
x2 = int(roi.x2 * w)
|
| 121 |
+
y2 = int(roi.y2 * h)
|
| 122 |
+
|
| 123 |
+
# Ensure coordinates are within image bounds
|
| 124 |
+
x1 = max(0, min(x1, w))
|
| 125 |
+
y1 = max(0, min(y1, h))
|
| 126 |
+
x2 = max(x1, min(x2, w))
|
| 127 |
+
y2 = max(y1, min(y2, h))
|
| 128 |
+
|
| 129 |
+
return image[y1:y2, x1:x2]
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
@asynccontextmanager
|
| 133 |
+
async def lifespan(app: FastAPI):
|
| 134 |
+
"""Application lifespan manager for model loading."""
|
| 135 |
+
global dots_ocr_model
|
| 136 |
+
|
| 137 |
+
logger.info("Loading Dots.OCR model...")
|
| 138 |
+
try:
|
| 139 |
+
if DOTS_OCR_AVAILABLE:
|
| 140 |
+
# Load Dots.OCR model
|
| 141 |
+
dots_ocr_model = DotsOCR()
|
| 142 |
+
logger.info("Dots.OCR model loaded successfully")
|
| 143 |
+
else:
|
| 144 |
+
logger.warning("Dots.OCR not available - using mock implementation")
|
| 145 |
+
dots_ocr_model = "mock"
|
| 146 |
+
except Exception as e:
|
| 147 |
+
logger.error(f"Failed to load Dots.OCR model: {e}")
|
| 148 |
+
# Don't raise - allow mock mode for development
|
| 149 |
+
dots_ocr_model = "mock"
|
| 150 |
+
|
| 151 |
+
yield
|
| 152 |
+
|
| 153 |
+
logger.info("Shutting down Dots.OCR endpoint...")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
app = FastAPI(
|
| 157 |
+
title="KYB Dots.OCR Text Extraction",
|
| 158 |
+
description="Dots.OCR for identity document text extraction with ROI support",
|
| 159 |
+
version="1.0.0",
|
| 160 |
+
lifespan=lifespan
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@app.get("/health")
|
| 165 |
+
async def health_check():
|
| 166 |
+
"""Health check endpoint."""
|
| 167 |
+
return {"status": "healthy", "version": "1.0.0"}
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@app.post("/v1/id/ocr", response_model=OCRResponse)
|
| 171 |
+
async def extract_text(
|
| 172 |
+
file: UploadFile = File(..., description="Image file to process"),
|
| 173 |
+
roi: Optional[str] = Form(None, description="ROI coordinates as JSON string")
|
| 174 |
+
):
|
| 175 |
+
"""Extract text from identity document image."""
|
| 176 |
+
if dots_ocr_model is None:
|
| 177 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 178 |
+
|
| 179 |
+
start_time = time.time()
|
| 180 |
+
request_id = str(uuid.uuid4())
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
# Read and validate image
|
| 184 |
+
image_data = await file.read()
|
| 185 |
+
image = Image.open(io.BytesIO(image_data))
|
| 186 |
+
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 187 |
+
|
| 188 |
+
# Parse ROI if provided
|
| 189 |
+
roi_bbox = None
|
| 190 |
+
if roi:
|
| 191 |
+
try:
|
| 192 |
+
roi_data = json.loads(roi)
|
| 193 |
+
roi_bbox = BoundingBox(**roi_data)
|
| 194 |
+
# Crop image to ROI
|
| 195 |
+
image_cv = crop_image_by_roi(image_cv, roi_bbox)
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logger.warning(f"Invalid ROI provided: {e}")
|
| 198 |
+
|
| 199 |
+
# Run OCR
|
| 200 |
+
if DOTS_OCR_AVAILABLE and dots_ocr_model != "mock":
|
| 201 |
+
# Use real Dots.OCR model
|
| 202 |
+
ocr_results = dots_ocr_model(image_cv)
|
| 203 |
+
ocr_text = " ".join([result.text for result in ocr_results])
|
| 204 |
+
else:
|
| 205 |
+
# Mock implementation for development
|
| 206 |
+
ocr_text = "MOCK OCR TEXT - Document Number: NLD123456789 Surname: MULDER Given Names: THOMAS"
|
| 207 |
+
logger.info("Using mock OCR implementation")
|
| 208 |
+
|
| 209 |
+
# Extract structured fields
|
| 210 |
+
extracted_fields = FieldExtractor.extract_fields(ocr_text)
|
| 211 |
+
|
| 212 |
+
# Extract MRZ data
|
| 213 |
+
mrz_data = FieldExtractor.extract_mrz(ocr_text)
|
| 214 |
+
|
| 215 |
+
# Create detection
|
| 216 |
+
detection = OCRDetection(
|
| 217 |
+
mrz_data=mrz_data,
|
| 218 |
+
extracted_fields=extracted_fields
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
processing_time = time.time() - start_time
|
| 222 |
+
|
| 223 |
+
return OCRResponse(
|
| 224 |
+
request_id=request_id,
|
| 225 |
+
media_type="image",
|
| 226 |
+
processing_time=processing_time,
|
| 227 |
+
detections=[detection]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logger.error(f"OCR extraction failed: {e}")
|
| 232 |
+
raise HTTPException(status_code=500, detail=f"OCR extraction failed: {str(e)}")
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
if __name__ == "__main__":
|
| 236 |
+
import uvicorn
|
| 237 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.112.1
|
| 2 |
+
uvicorn[standard]==0.30.6
|
| 3 |
+
python-multipart==0.0.9
|
| 4 |
+
pydantic==2.0.0
|
| 5 |
+
opencv-python>=4.9.0.80
|
| 6 |
+
numpy>=1.26.0
|
| 7 |
+
pillow>=10.3.0
|
| 8 |
+
torch>=2.2.0
|
| 9 |
+
torchvision>=0.17.0
|
| 10 |
+
dots-ocr>=0.1.0
|