import os import faiss import numpy as np from phi.agent import Agent from phi.tools.firecrawl import FirecrawlTools from phi.tools.duckduckgo import DuckDuckGo from phi.model.openai import OpenAIChat from phi.embedder.openai import OpenAIEmbedder from typing import List from pydantic import BaseModel, Field from fastapi import UploadFile # Load environment variables (API keys, etc.) from dotenv import load_dotenv load_dotenv() ##################################################################################### # PHASE 1 # ##################################################################################### ############################## # 1️⃣ Company Search Agent # ############################## company_search_agent = Agent( name="Company Search Agent", model=OpenAIChat(id="gpt-4o"), tools=[DuckDuckGo()], description="Finds company details based on name using web search.", instructions=["Always include sources in search results."], show_tool_calls=True, markdown=True, ) def search_company(company_name: str): query = f"Find detailed company information for {company_name}. Extract its official website, mission, services, and any AI-related initiatives. Prioritize official sources and provide links where available." response = company_search_agent.run(query) return response.content ############################## # 2️⃣ Website Scraper Agent # ############################## firecrawl_agent = Agent( name="Website Scraper Agent", tools=[FirecrawlTools(scrape=True, crawl=False)], description="Extracts content from company websites.", show_tool_calls=True, markdown=True, ) def scrape_website(url: str): response = firecrawl_agent.run(f"Extract all relevant business information from {url}, including mission statement, services, case studies, and AI-related content. Provide structured output.") return response.content ############################## # 3️⃣ Text Processing Agent # ############################## class CompanySummary(BaseModel): summary: str = Field(..., description="Summarized company details based on user input.") text_processing_agent = Agent( model=OpenAIChat(id="gpt-4o"), description="Summarizes user-written company descriptions.", response_model=CompanySummary, ) def process_company_description(text: str): response = text_processing_agent.run(f"Summarize the following company description: {text}. Focus on key services, mission, industry, and potential AI use cases where applicable.") return response.content ################################# # 4️⃣ Document Processing Agent # ################################# # FAISS Index for storing extracted knowledge embedding_model = OpenAIEmbedder(model="text-embedding-3-small") dimension = 1536 # OpenAI's embedding dimension faiss_index = faiss.IndexFlatL2(dimension) def process_uploaded_document(file: UploadFile): file_path = f"tmp/{file.filename}" with open(file_path, "wb") as buffer: buffer.write(file.file.read()) with open(file_path, "r", encoding="utf-8") as f: document_text = f.read() # Optionally, you can generate an embedding if needed, but return the raw content for further phases. return document_text # Return the processed document text # def process_uploaded_document(file: UploadFile): # file_path = f"tmp/{file.filename}" # with open(file_path, "wb") as buffer: # buffer.write(file.file.read()) # with open(file_path, "r", encoding="utf-8") as f: # document_text = f.read() # # Generate embedding # embedding = np.array(embedding_model.embed([document_text])).astype("float32") # faiss_index.add(embedding) # return f"Document processed and stored in FAISS index: {file.filename}"