model_endpoints / main.py
karcadan-unicorn's picture
Upload 7 files
c8440e8 verified
import os
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi import File, UploadFile
from kor.extraction import create_extraction_chain
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
import json
from scrape_parse_combine import scrape_parse_combine
import schema_quick_analysis
import schema_indepth_analysis
def configure():
load_dotenv()
configure()
app = FastAPI()
openai_key = os.getenv("openai_key")
llm = ChatOpenAI(
model_name="gpt-4o",
temperature=0,
max_tokens=2000,
openai_api_key=openai_key
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/ping")
async def ping():
return "Hello, I am alive"
# Helper function. Upload a pdf_file and save it
def upload(file):
file_name = ""
try:
contents = file.file.read()
with open(file.filename, 'wb') as f:
f.write(contents)
except Exception:
return {"message": "There was an error uploading the file"}
finally:
file.file.close()
file_name += file.filename
pdf_path = f"./{file_name}"
return pdf_path
@app.post("/quick_analysis")
async def quick_analysis(file: UploadFile = File(...)):
state_dict = {}
pdf_path = upload(file)
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
doc_info = ""
for page in range(len(pages)):
doc_info += pages[page].page_content
state_dict["pdf_doc"] = doc_info
chain = create_extraction_chain(
llm, schema_quick_analysis.schema, encoder_or_encoder_class="json")
doc_output = chain.invoke(doc_info)["data"]
state_dict["website_url"] = doc_output["startup_info"]["website_url"]
# Write JSON string to a file
with open('pdf_data.json', 'w') as json_file:
json.dump(state_dict, json_file)
return {"quick_analysis": doc_output}
@app.post("/indepth_analysis")
async def indepth_analysis():
scrape_parse_combine(openai_key)
# Load JSON data from a file
with open('pdf_data.json', 'r') as json_file:
data = json.load(json_file)
result = data["startup_info"]
chain = create_extraction_chain(
llm, schema_indepth_analysis.schema, encoder_or_encoder_class="json")
doc_output = chain.invoke(result)["data"]
return {"indepth_analysis": doc_output["startup_info"]}