File size: 2,673 Bytes
206ef5f
37a2681
206ef5f
37a2681
206ef5f
 
 
 
 
 
 
 
 
 
 
 
37a2681
206ef5f
37a2681
206ef5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37a2681
206ef5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4689e89
206ef5f
4689e89
206ef5f
 
 
 
 
 
 
 
37a2681
 
206ef5f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from fastapi import FastAPI, UploadFile, HTTPException

import asyncio

from utils.PdfUtils import ProcessPdf
from utils.HelperFunctions import (
    generate_file_id,
    save_to_database,
    retrieve_from_database,
)
from utils.VectorDatabase import AdvancedClient
from utils.ModelCallingFunctions import (
    industry_finder,
    other_info,
    business_information,
)

app = FastAPI()

client = AdvancedClient("VectorDB")


@app.post(
    "/get_analysis",
    responses={
        200: {
            "description": "Successful Response",
            "content": {
                "application/json": {
                    "example": {
                        "industry": {
                            "pitch-deck": "File Name",
                            "industry": "XYZ",
                            "niche": "ABC",
                        },
                        "other_info": {
                            "Risk Involved": "Markdown",
                            "Barrier To Entry": "Markdown",
                            "Competitors": "Markdown",
                            "Challenges": "Markdown",
                        },
                        "business_info": {
                            "product-and-market": "{...}",
                            "team-and-strategy": "{...}",
                            "financials": "{...}",
                        },
                    }
                }
            },
        }
    },
)
async def get_analysis(pdf_file: UploadFile):
    if not pdf_file:
        raise HTTPException(status_code=400, detail="Pitch PDF file not provided")
    pdf_content = await pdf_file.read()
    pdf_id = generate_file_id(pdf_content)
    file_name = pdf_file.filename
    if pdf_id not in [
        collection.name for collection in client.client.list_collections()
    ]:
        pdf_chunks = ProcessPdf(pdf_content=pdf_content)
        client.create_collection(collection_id=pdf_id, file_datas=pdf_chunks)

        # Starting of pitch deck information extraction and structuring
        industry_info = industry_finder(collection_id=pdf_id)
        industry_info["pitch-deck"] = file_name

        other_info_results = await other_info(company_data=industry_info)

        business_info = await business_information(collection_id=pdf_id)

        json = {
            "industry": industry_info,
            "other_info": other_info_results,
            "business_info": business_info,
        }
        save_to_database(_id=pdf_id, data=json)
        return json
    else:

        # Starting of pitch deck information extraction and structuring
        json = retrieve_from_database(_id=pdf_id)
        return json