File size: 4,351 Bytes
f871fed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
OCR API Router

Endpoints for image processing and text extraction.
"""

from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from pydantic import BaseModel
from loguru import logger

from open_notebook.services.ocr_service import ocr_service, OCRResult, StructuredNote


router = APIRouter(prefix="/ocr", tags=["ocr"])


# ============================================================================
# Request/Response Models
# ============================================================================

class OCRBase64Request(BaseModel):
    """Request for OCR with base64 encoded image."""
    image: str  # Base64 encoded image
    structure: bool = True  # Whether to structure the result


class OCRResponse(BaseModel):
    """Response from OCR processing."""
    raw_text: str
    confidence: Optional[float] = None
    processing_time_ms: int
    source_format: str
    structured: Optional[StructuredNote] = None


# ============================================================================
# Endpoints
# ============================================================================

@router.post("/process", response_model=OCRResponse)
async def process_image(request: OCRBase64Request):
    """
    Process a base64 encoded image and extract text using OCR.
    
    Supports PNG, JPEG, and other common image formats.
    Optionally structures the extracted text using LLM.
    """
    try:
        # Run OCR
        result = ocr_service.process_image_base64(request.image)
        
        # Structure if requested
        structured = None
        if request.structure and result.raw_text:
            try:
                structured = await ocr_service.structure_text(result.raw_text)
            except Exception as e:
                logger.warning(f"Failed to structure OCR text: {e}")
        
        return OCRResponse(
            raw_text=result.raw_text,
            confidence=result.confidence,
            processing_time_ms=result.processing_time_ms,
            source_format=result.source_format,
            structured=structured,
        )
        
    except RuntimeError as e:
        raise HTTPException(status_code=503, detail=str(e))
    except Exception as e:
        logger.error(f"OCR processing failed: {e}")
        raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")


@router.post("/upload", response_model=OCRResponse)
async def process_uploaded_image(
    file: UploadFile = File(...),
    structure: bool = Form(True),
):
    """
    Process an uploaded image file and extract text using OCR.
    
    Supports PNG, JPEG, and other common image formats.
    """
    # Validate file type
    allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp"]
    if file.content_type not in allowed_types:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid file type. Allowed types: {', '.join(allowed_types)}"
        )
    
    try:
        # Read file
        contents = await file.read()
        
        # Run OCR
        result = ocr_service.process_image_bytes(contents)
        
        # Structure if requested
        structured = None
        if structure and result.raw_text:
            try:
                structured = await ocr_service.structure_text(result.raw_text)
            except Exception as e:
                logger.warning(f"Failed to structure OCR text: {e}")
        
        return OCRResponse(
            raw_text=result.raw_text,
            confidence=result.confidence,
            processing_time_ms=result.processing_time_ms,
            source_format=result.source_format,
            structured=structured,
        )
        
    except RuntimeError as e:
        raise HTTPException(status_code=503, detail=str(e))
    except Exception as e:
        logger.error(f"OCR processing failed: {e}")
        raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")


@router.get("/status")
async def get_ocr_status():
    """Check if OCR service is available."""
    return {
        "available": ocr_service.tesseract_available,
        "message": "OCR service is ready" if ocr_service.tesseract_available 
                   else "Tesseract is not installed. OCR functionality is unavailable."
    }