ocr-engine / src /controllers /_parser_controller.py
kanha-upadhyay's picture
init
e42e330
raw
history blame
1.84 kB
from fastapi import APIRouter, Body, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from loguru import logger
from pydantic import BaseModel
from src.services import PDFProcessorService
class EntityExtractorSchema(BaseModel):
text: str
class ParserController:
def __init__(self):
self.router = APIRouter()
self.service = PDFProcessorService()
self.router.add_api_route("/pdf", self.parse_pdf, methods=["POST"])
self.router.add_api_route("/entity", self.extract_entity, methods=["POST"])
async def parse_pdf(self, file: UploadFile = File(...)):
try:
if not file:
raise HTTPException(status_code=400, detail="No file uploaded")
if file.content_type != "application/pdf":
raise HTTPException(status_code=400, detail="Invalid file type")
async with self.service as processor:
extracted_data = await processor.process_pdf(file)
return JSONResponse(content={"data": extracted_data})
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise HTTPException(
status_code=500,
detail=str(e),
)
async def extract_entity(
self, entity_extractor_schema: EntityExtractorSchema = Body(...)
):
try:
extracted_entity = await self.service.extract_entity(
entity_extractor_schema.text
)
return JSONResponse(content={"data": extracted_entity})
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e)
raise HTTPException(
status_code=500,
detail=str(e),
)