import os import aiofiles from fastapi import APIRouter, Body, File, HTTPException, UploadFile from fastapi.responses import JSONResponse from loguru import logger from pydantic import BaseModel from src.services import PDFProcessorService from src.utils import PDFPreprocessor class EntityExtractorSchema(BaseModel): text: str class ParserController: def __init__(self): self.router = APIRouter() self.service = PDFProcessorService() self.router.add_api_route("/pdf", self.parse_pdf, methods=["POST"]) self.router.add_api_route("/entity", self.extract_entity, methods=["POST"]) async def parse_pdf( self, file: UploadFile = File(...), preprocess: bool = Body(False) ): try: if not file.filename.lower().endswith(".pdf"): raise HTTPException( status_code=400, detail="Invalid file type. Only PDF files are accepted.", ) if file.content_type != "application/pdf": raise HTTPException( status_code=400, detail="Invalid content type. Only PDF files are accepted.", ) async with aiofiles.tempfile.NamedTemporaryFile( prefix=file.filename + "_original_", suffix=".pdf", delete=False ) as temp_pdf: content = await file.read() await temp_pdf.write(content) await temp_pdf.flush() await file.seek(0) if preprocess: logger.info("Preprocessing the uploaded PDF file.") file = await PDFPreprocessor.preprocess(file=file) if file is None: raise HTTPException( status_code=400, detail="Failed to preprocess the PDF file.", ) async with self.service as processor: extracted_data = await processor.process_pdf(file) return JSONResponse(content={"data": extracted_data}) except HTTPException as e: raise e except Exception as e: logger.exception(e) raise HTTPException( status_code=500, detail=str(e), ) finally: if os.path.exists(temp_pdf.name): os.remove(temp_pdf.name) async def extract_entity( self, entity_extractor_schema: EntityExtractorSchema = Body(...) ): try: extracted_entity = await self.service.extract_entity( entity_extractor_schema.text ) return JSONResponse(content={"data": extracted_entity}) except HTTPException as e: raise e except Exception as e: logger.exception(e) raise HTTPException( status_code=500, detail=str(e), )