Spaces:

baveshraam
/

open-notebook

Sleeping

App Files Files Community

open-notebook / api /routers /ocr.py

baveshraam

FIX: SurrealDB 2.0 migration syntax and Frontend/CORS link

f871fed 6 days ago

raw

history blame contribute delete

4.35 kB

	"""
	OCR API Router

	Endpoints for image processing and text extraction.
	"""

	from typing import Optional
	from fastapi import APIRouter, HTTPException, UploadFile, File, Form
	from pydantic import BaseModel
	from loguru import logger

	from open_notebook.services.ocr_service import ocr_service, OCRResult, StructuredNote


	router = APIRouter(prefix="/ocr", tags=["ocr"])


	# ============================================================================
	# Request/Response Models
	# ============================================================================

	class OCRBase64Request(BaseModel):
	"""Request for OCR with base64 encoded image."""
	image: str # Base64 encoded image
	structure: bool = True # Whether to structure the result


	class OCRResponse(BaseModel):
	"""Response from OCR processing."""
	raw_text: str
	confidence: Optional[float] = None
	processing_time_ms: int
	source_format: str
	structured: Optional[StructuredNote] = None


	# ============================================================================
	# Endpoints
	# ============================================================================

	@router.post("/process", response_model=OCRResponse)
	async def process_image(request: OCRBase64Request):
	"""
	Process a base64 encoded image and extract text using OCR.

	Supports PNG, JPEG, and other common image formats.
	Optionally structures the extracted text using LLM.
	"""
	try:
	# Run OCR
	result = ocr_service.process_image_base64(request.image)

	# Structure if requested
	structured = None
	if request.structure and result.raw_text:
	try:
	structured = await ocr_service.structure_text(result.raw_text)
	except Exception as e:
	logger.warning(f"Failed to structure OCR text: {e}")

	return OCRResponse(
	raw_text=result.raw_text,
	confidence=result.confidence,
	processing_time_ms=result.processing_time_ms,
	source_format=result.source_format,
	structured=structured,
	)

	except RuntimeError as e:
	raise HTTPException(status_code=503, detail=str(e))
	except Exception as e:
	logger.error(f"OCR processing failed: {e}")
	raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")


	@router.post("/upload", response_model=OCRResponse)
	async def process_uploaded_image(
	file: UploadFile = File(...),
	structure: bool = Form(True),
	):
	"""
	Process an uploaded image file and extract text using OCR.

	Supports PNG, JPEG, and other common image formats.
	"""
	# Validate file type
	allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp"]
	if file.content_type not in allowed_types:
	raise HTTPException(
	status_code=400,
	detail=f"Invalid file type. Allowed types: {', '.join(allowed_types)}"
	)

	try:
	# Read file
	contents = await file.read()

	# Run OCR
	result = ocr_service.process_image_bytes(contents)

	# Structure if requested
	structured = None
	if structure and result.raw_text:
	try:
	structured = await ocr_service.structure_text(result.raw_text)
	except Exception as e:
	logger.warning(f"Failed to structure OCR text: {e}")

	return OCRResponse(
	raw_text=result.raw_text,
	confidence=result.confidence,
	processing_time_ms=result.processing_time_ms,
	source_format=result.source_format,
	structured=structured,
	)

	except RuntimeError as e:
	raise HTTPException(status_code=503, detail=str(e))
	except Exception as e:
	logger.error(f"OCR processing failed: {e}")
	raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}")


	@router.get("/status")
	async def get_ocr_status():
	"""Check if OCR service is available."""
	return {
	"available": ocr_service.tesseract_available,
	"message": "OCR service is ready" if ocr_service.tesseract_available
	else "Tesseract is not installed. OCR functionality is unavailable."
	}