ForestRabbit's picture
Update agent.py
e91020d verified
raw
history blame
4.57 kB
import os
import json
from typing import Dict
from langchain.agents import initialize_agent, AgentType
from langchain_community.tools import Tool, WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_google_genai import ChatGoogleGenerativeAI
import pandas as pd
from pathlib import Path
from docx import Document
import fitz # PyMuPDF
import requests
class BraveSearchTool:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://api.search.brave.com/res/v1/web/search"
def run(self, query: str) -> str:
try:
response = requests.get(
self.base_url,
headers={"Accept": "application/json", "X-Subscription-Token": self.api_key},
params={"q": query}
)
response.raise_for_status()
results = response.json().get("web", {}).get("results", [])
if results:
return results[0].get("title", "") + ": " + results[0].get("url", "")
else:
return "No results found."
except Exception as e:
return f"BraveSearchTool ERROR: {str(e)}"
class Agent:
def __init__(self):
gemini_key = os.getenv("GEMINI_API_KEY")
brave_key = os.getenv("BRAVE_SEARCH_API_KEY")
if not gemini_key:
raise ValueError("GEMINI_API_KEY not found in environment variables.")
if not brave_key:
raise ValueError("BRAVE_SEARCH_API_KEY not found in environment variables.")
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-pro",
google_api_key=gemini_key,
convert_system_message_to_human=True
)
tools = [
Tool(
name="Wikipedia",
func=WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run,
description="Useful for general knowledge and encyclopedic questions."
),
Tool(
name="Calculator",
func=PythonREPLTool().run,
description="Useful for solving math and logical problems through Python."
),
Tool(
name="Brave Search",
func=BraveSearchTool(api_key=brave_key).run,
description="Useful for factual and current event queries using Brave search engine."
)
]
self.agent = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
handle_parsing_errors=True
)
def __call__(self, input_data: Dict) -> str:
question = input_data.get("question", "")
file_names = input_data.get("file_names", [])
task_id = input_data.get("task_id", "")
try:
if file_names:
file_path = f"/home/user/app/files/{task_id}/{file_names[0]}"
ext = Path(file_path).suffix.lower()
if ext in [".csv", ".tsv"]:
df = pd.read_csv(file_path)
return f"Loaded table with {df.shape[0]} rows and {df.shape[1]} columns."
elif ext in [".xlsx"]:
df = pd.read_excel(file_path)
return f"Loaded spreadsheet with {df.shape[0]} rows and {df.shape[1]} columns."
elif ext in [".json", ".jsonl"]:
with open(file_path, "r", encoding="utf-8") as f:
if ext == ".jsonl":
data = [json.loads(line) for line in f if line.strip()]
else:
data = json.load(f)
return f"Loaded JSON with {len(data)} items."
elif ext == ".docx":
doc = Document(file_path)
text = "\n".join([para.text for para in doc.paragraphs])
return f"Loaded DOCX with {len(text)} characters."
elif ext == ".pdf":
doc = fitz.open(file_path)
text = "".join([page.get_text() for page in doc])
return f"Loaded PDF with {len(doc)} pages and {len(text)} characters."
else:
return "Unsupported file type for this task."
result = self.agent.run(question)
return result.strip()
except Exception as e:
return f"AGENT ERROR: {str(e)}"