import os from dotenv import load_dotenv from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi import File, UploadFile from kor.extraction import create_extraction_chain from langchain_openai import ChatOpenAI from langchain_community.document_loaders import PyPDFLoader import json from scrape_parse_combine import scrape_parse_combine import schema_quick_analysis import schema_indepth_analysis def configure(): load_dotenv() configure() app = FastAPI() openai_key = os.getenv("openai_key") llm = ChatOpenAI( model_name="gpt-4o", temperature=0, max_tokens=2000, openai_api_key=openai_key ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/ping") async def ping(): return "Hello, I am alive" # Helper function. Upload a pdf_file and save it def upload(file): file_name = "" try: contents = file.file.read() with open(file.filename, 'wb') as f: f.write(contents) except Exception: return {"message": "There was an error uploading the file"} finally: file.file.close() file_name += file.filename pdf_path = f"./{file_name}" return pdf_path @app.post("/quick_analysis") async def quick_analysis(file: UploadFile = File(...)): state_dict = {} pdf_path = upload(file) loader = PyPDFLoader(pdf_path) pages = loader.load_and_split() doc_info = "" for page in range(len(pages)): doc_info += pages[page].page_content state_dict["pdf_doc"] = doc_info chain = create_extraction_chain( llm, schema_quick_analysis.schema, encoder_or_encoder_class="json") doc_output = chain.invoke(doc_info)["data"] state_dict["website_url"] = doc_output["startup_info"]["website_url"] # Write JSON string to a file with open('pdf_data.json', 'w') as json_file: json.dump(state_dict, json_file) return {"quick_analysis": doc_output} @app.post("/indepth_analysis") async def indepth_analysis(): scrape_parse_combine(openai_key) # Load JSON data from a file with open('pdf_data.json', 'r') as json_file: data = json.load(json_file) result = data["startup_info"] chain = create_extraction_chain( llm, schema_indepth_analysis.schema, encoder_or_encoder_class="json") doc_output = chain.invoke(result)["data"] return {"indepth_analysis": doc_output["startup_info"]}