Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi import File, UploadFile | |
| from kor.extraction import create_extraction_chain | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.document_loaders import PyPDFLoader | |
| import json | |
| from scrape_parse_combine import scrape_parse_combine | |
| import schema_quick_analysis | |
| import schema_indepth_analysis | |
| def configure(): | |
| load_dotenv() | |
| configure() | |
| app = FastAPI() | |
| openai_key = os.getenv("openai_key") | |
| llm = ChatOpenAI( | |
| model_name="gpt-4o", | |
| temperature=0, | |
| max_tokens=2000, | |
| openai_api_key=openai_key | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def ping(): | |
| return "Hello, I am alive" | |
| # Helper function. Upload a pdf_file and save it | |
| def upload(file): | |
| file_name = "" | |
| try: | |
| contents = file.file.read() | |
| with open(file.filename, 'wb') as f: | |
| f.write(contents) | |
| except Exception: | |
| return {"message": "There was an error uploading the file"} | |
| finally: | |
| file.file.close() | |
| file_name += file.filename | |
| pdf_path = f"./{file_name}" | |
| return pdf_path | |
| async def quick_analysis(file: UploadFile = File(...)): | |
| state_dict = {} | |
| pdf_path = upload(file) | |
| loader = PyPDFLoader(pdf_path) | |
| pages = loader.load_and_split() | |
| doc_info = "" | |
| for page in range(len(pages)): | |
| doc_info += pages[page].page_content | |
| state_dict["pdf_doc"] = doc_info | |
| chain = create_extraction_chain( | |
| llm, schema_quick_analysis.schema, encoder_or_encoder_class="json") | |
| doc_output = chain.invoke(doc_info)["data"] | |
| state_dict["website_url"] = doc_output["startup_info"]["website_url"] | |
| # Write JSON string to a file | |
| with open('pdf_data.json', 'w') as json_file: | |
| json.dump(state_dict, json_file) | |
| return {"quick_analysis": doc_output} | |
| async def indepth_analysis(): | |
| scrape_parse_combine(openai_key) | |
| # Load JSON data from a file | |
| with open('pdf_data.json', 'r') as json_file: | |
| data = json.load(json_file) | |
| result = data["startup_info"] | |
| chain = create_extraction_chain( | |
| llm, schema_indepth_analysis.schema, encoder_or_encoder_class="json") | |
| doc_output = chain.invoke(result)["data"] | |
| return {"indepth_analysis": doc_output["startup_info"]} | |