File size: 1,835 Bytes
e42e330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from fastapi import APIRouter, Body, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from loguru import logger
from pydantic import BaseModel

from src.services import PDFProcessorService


class EntityExtractorSchema(BaseModel):
    text: str


class ParserController:

    def __init__(self):
        self.router = APIRouter()
        self.service = PDFProcessorService()
        self.router.add_api_route("/pdf", self.parse_pdf, methods=["POST"])
        self.router.add_api_route("/entity", self.extract_entity, methods=["POST"])

    async def parse_pdf(self, file: UploadFile = File(...)):
        try:
            if not file:
                raise HTTPException(status_code=400, detail="No file uploaded")
            if file.content_type != "application/pdf":
                raise HTTPException(status_code=400, detail="Invalid file type")
            async with self.service as processor:
                extracted_data = await processor.process_pdf(file)
            return JSONResponse(content={"data": extracted_data})
        except HTTPException as e:
            raise e
        except Exception as e:
            logger.exception(e)
            raise HTTPException(
                status_code=500,
                detail=str(e),
            )

    async def extract_entity(
        self, entity_extractor_schema: EntityExtractorSchema = Body(...)
    ):
        try:
            extracted_entity = await self.service.extract_entity(
                entity_extractor_schema.text
            )
            return JSONResponse(content={"data": extracted_entity})
        except HTTPException as e:
            raise e
        except Exception as e:
            logger.exception(e)
            raise HTTPException(
                status_code=500,
                detail=str(e),
            )