Spaces:
Sleeping
Sleeping
| import os | |
| import faiss | |
| import numpy as np | |
| from phi.agent import Agent | |
| from phi.tools.firecrawl import FirecrawlTools | |
| from phi.tools.duckduckgo import DuckDuckGo | |
| from phi.model.openai import OpenAIChat | |
| from phi.embedder.openai import OpenAIEmbedder | |
| from typing import List | |
| from pydantic import BaseModel, Field | |
| from fastapi import UploadFile | |
| # Load environment variables (API keys, etc.) | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| ##################################################################################### | |
| # PHASE 1 # | |
| ##################################################################################### | |
| ############################## | |
| # 1️⃣ Company Search Agent # | |
| ############################## | |
| company_search_agent = Agent( | |
| name="Company Search Agent", | |
| model=OpenAIChat(id="gpt-4o"), | |
| tools=[DuckDuckGo()], | |
| description="Finds company details based on name using web search.", | |
| instructions=["Always include sources in search results."], | |
| show_tool_calls=True, | |
| markdown=True, | |
| ) | |
| def search_company(company_name: str): | |
| query = f"Find detailed company information for {company_name}. Extract its official website, mission, services, and any AI-related initiatives. Prioritize official sources and provide links where available." | |
| response = company_search_agent.run(query) | |
| return response.content | |
| ############################## | |
| # 2️⃣ Website Scraper Agent # | |
| ############################## | |
| firecrawl_agent = Agent( | |
| name="Website Scraper Agent", | |
| tools=[FirecrawlTools(scrape=True, crawl=False)], | |
| description="Extracts content from company websites.", | |
| show_tool_calls=True, | |
| markdown=True, | |
| ) | |
| def scrape_website(url: str): | |
| response = firecrawl_agent.run(f"Extract all relevant business information from {url}, including mission statement, services, case studies, and AI-related content. Provide structured output.") | |
| return response.content | |
| ############################## | |
| # 3️⃣ Text Processing Agent # | |
| ############################## | |
| class CompanySummary(BaseModel): | |
| summary: str = Field(..., description="Summarized company details based on user input.") | |
| text_processing_agent = Agent( | |
| model=OpenAIChat(id="gpt-4o"), | |
| description="Summarizes user-written company descriptions.", | |
| response_model=CompanySummary, | |
| ) | |
| def process_company_description(text: str): | |
| response = text_processing_agent.run(f"Summarize the following company description: {text}. Focus on key services, mission, industry, and potential AI use cases where applicable.") | |
| return response.content | |
| ################################# | |
| # 4️⃣ Document Processing Agent # | |
| ################################# | |
| # FAISS Index for storing extracted knowledge | |
| embedding_model = OpenAIEmbedder(model="text-embedding-3-small") | |
| dimension = 1536 # OpenAI's embedding dimension | |
| faiss_index = faiss.IndexFlatL2(dimension) | |
| def process_uploaded_document(file: UploadFile): | |
| file_path = f"tmp/{file.filename}" | |
| with open(file_path, "wb") as buffer: | |
| buffer.write(file.file.read()) | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| document_text = f.read() | |
| # Optionally, you can generate an embedding if needed, but return the raw content for further phases. | |
| return document_text # Return the processed document text | |
| # def process_uploaded_document(file: UploadFile): | |
| # file_path = f"tmp/{file.filename}" | |
| # with open(file_path, "wb") as buffer: | |
| # buffer.write(file.file.read()) | |
| # with open(file_path, "r", encoding="utf-8") as f: | |
| # document_text = f.read() | |
| # # Generate embedding | |
| # embedding = np.array(embedding_model.embed([document_text])).astype("float32") | |
| # faiss_index.add(embedding) | |
| # return f"Document processed and stored in FAISS index: {file.filename}" | |