Spaces:
Sleeping
Sleeping
File size: 4,351 Bytes
f871fed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""
OCR API Router
Endpoints for image processing and text extraction.
"""
from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from pydantic import BaseModel
from loguru import logger
from open_notebook.services.ocr_service import ocr_service, OCRResult, StructuredNote
router = APIRouter(prefix="/ocr", tags=["ocr"])
# ============================================================================
# Request/Response Models
# ============================================================================
class OCRBase64Request(BaseModel):
"""Request for OCR with base64 encoded image."""
image: str # Base64 encoded image
structure: bool = True # Whether to structure the result
class OCRResponse(BaseModel):
"""Response from OCR processing."""
raw_text: str
confidence: Optional[float] = None
processing_time_ms: int
source_format: str
structured: Optional[StructuredNote] = None
# ============================================================================
# Endpoints
# ============================================================================
@router.post("/process", response_model=OCRResponse)
async def process_image(request: OCRBase64Request):
"""
Process a base64 encoded image and extract text using OCR.
Supports PNG, JPEG, and other common image formats.
Optionally structures the extracted text using LLM.
"""
try:
# Run OCR
result = ocr_service.process_image_base64(request.image)
# Structure if requested
structured = None
if request.structure and result.raw_text:
try:
structured = await ocr_service.structure_text(result.raw_text)
except Exception as e:
logger.warning(f"Failed to structure OCR text: {e}")
return OCRResponse(
raw_text=result.raw_text,
confidence=result.confidence,
processing_time_ms=result.processing_time_ms,
source_format=result.source_format,
structured=structured,
)
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))
except Exception as e:
logger.error(f"OCR processing failed: {e}")
raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")
@router.post("/upload", response_model=OCRResponse)
async def process_uploaded_image(
file: UploadFile = File(...),
structure: bool = Form(True),
):
"""
Process an uploaded image file and extract text using OCR.
Supports PNG, JPEG, and other common image formats.
"""
# Validate file type
allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp"]
if file.content_type not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Allowed types: {', '.join(allowed_types)}"
)
try:
# Read file
contents = await file.read()
# Run OCR
result = ocr_service.process_image_bytes(contents)
# Structure if requested
structured = None
if structure and result.raw_text:
try:
structured = await ocr_service.structure_text(result.raw_text)
except Exception as e:
logger.warning(f"Failed to structure OCR text: {e}")
return OCRResponse(
raw_text=result.raw_text,
confidence=result.confidence,
processing_time_ms=result.processing_time_ms,
source_format=result.source_format,
structured=structured,
)
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))
except Exception as e:
logger.error(f"OCR processing failed: {e}")
raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")
@router.get("/status")
async def get_ocr_status():
"""Check if OCR service is available."""
return {
"available": ocr_service.tesseract_available,
"message": "OCR service is ready" if ocr_service.tesseract_available
else "Tesseract is not installed. OCR functionality is unavailable."
}
|