| | import os |
| | import json |
| | from typing import Dict |
| | from langchain.agents import initialize_agent, AgentType |
| | from langchain_community.tools import Tool, WikipediaQueryRun |
| | from langchain_community.utilities import WikipediaAPIWrapper |
| | from langchain_experimental.tools.python.tool import PythonREPLTool |
| | from langchain_google_genai import ChatGoogleGenerativeAI |
| | import pandas as pd |
| | from pathlib import Path |
| | from docx import Document |
| | import fitz |
| | import requests |
| |
|
| | class BraveSearchTool: |
| | def __init__(self, api_key: str): |
| | self.api_key = api_key |
| | self.base_url = "https://api.search.brave.com/res/v1/web/search" |
| |
|
| | def run(self, query: str) -> str: |
| | try: |
| | response = requests.get( |
| | self.base_url, |
| | headers={"Accept": "application/json", "X-Subscription-Token": self.api_key}, |
| | params={"q": query} |
| | ) |
| | response.raise_for_status() |
| | results = response.json().get("web", {}).get("results", []) |
| | if results: |
| | return results[0].get("title", "") + ": " + results[0].get("url", "") |
| | else: |
| | return "No results found." |
| | except Exception as e: |
| | return f"BraveSearchTool ERROR: {str(e)}" |
| |
|
| | class Agent: |
| | def __init__(self): |
| | gemini_key = os.getenv("GEMINI_API_KEY") |
| | brave_key = os.getenv("BRAVE_SEARCH_API_KEY") |
| | if not gemini_key: |
| | raise ValueError("GEMINI_API_KEY not found in environment variables.") |
| | if not brave_key: |
| | raise ValueError("BRAVE_SEARCH_API_KEY not found in environment variables.") |
| |
|
| | llm = ChatGoogleGenerativeAI( |
| | model="gemini-1.5-pro", |
| | google_api_key=gemini_key, |
| | convert_system_message_to_human=True |
| | ) |
| |
|
| | tools = [ |
| | Tool( |
| | name="Wikipedia", |
| | func=WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run, |
| | description="Useful for general knowledge and encyclopedic questions." |
| | ), |
| | Tool( |
| | name="Calculator", |
| | func=PythonREPLTool().run, |
| | description="Useful for solving math and logical problems through Python." |
| | ), |
| | Tool( |
| | name="Brave Search", |
| | func=BraveSearchTool(api_key=brave_key).run, |
| | description="Useful for factual and current event queries using Brave search engine." |
| | ) |
| | ] |
| |
|
| | self.agent = initialize_agent( |
| | tools=tools, |
| | llm=llm, |
| | agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, |
| | verbose=True, |
| | handle_parsing_errors=True |
| | ) |
| |
|
| | def __call__(self, input_data: Dict) -> str: |
| | question = input_data.get("question", "") |
| | file_names = input_data.get("file_names", []) |
| | task_id = input_data.get("task_id", "") |
| |
|
| | system_prompt = ( |
| | "You are a member of a multidisciplinary research institute, tackling complex and ambiguous problems across knowledge, reasoning, and vision.\n\n" |
| | "You have access to tools like search engines, calculators, and data analysis environments. Your task is to solve the following question carefully and completely.\n\n" |
| | "You must:\n" |
| | "- Think step by step, and write down all reasoning.\n" |
| | "- If information is missing, use what you know and search if needed.\n" |
| | "- If you encounter a file, inspect its content and extract relevant information.\n" |
| | "- Use available tools only when needed, but do not rely on them blindly.\n" |
| | "- If a tool does not return the final answer, analyze the result and continue reasoning.\n\n" |
| | "Always:\n" |
| | "- Confirm that your answer satisfies the constraints (e.g., format, brevity, units).\n" |
| | "- Answer in one English sentence only, with no explanation.\n" |
| | "- If the question has a strict required output format, follow it exactly.\n" |
| | "- Do not end your output until you're confident your answer is final and complete.\n\n" |
| | "---\n\n" |
| | "Now solve the following task as best as possible. Do not skip steps. Think hard. Use all your skills and tools. Good luck.\n\n" |
| | ) |
| |
|
| | file_summary = "" |
| | try: |
| | if file_names: |
| | file_path = f"/home/user/app/files/{task_id}/{file_names[0]}" |
| | ext = Path(file_path).suffix.lower() |
| | if ext in [".csv", ".tsv"]: |
| | df = pd.read_csv(file_path) |
| | file_summary = f"The following table has been loaded with {df.shape[0]} rows and {df.shape[1]} columns:\n{df.head(3).to_string(index=False)}" |
| | elif ext == ".xlsx": |
| | df = pd.read_excel(file_path) |
| | file_summary = f"The following spreadsheet has been loaded with {df.shape[0]} rows and {df.shape[1]} columns:\n{df.head(3).to_string(index=False)}" |
| | elif ext in [".json", ".jsonl"]: |
| | with open(file_path, "r", encoding="utf-8") as f: |
| | if ext == ".jsonl": |
| | data = [json.loads(line) for line in f if line.strip()] |
| | else: |
| | data = json.load(f) |
| | file_summary = f"The following JSON data was loaded ({len(data)} items)." |
| | elif ext == ".docx": |
| | doc = Document(file_path) |
| | text = "\n".join([para.text for para in doc.paragraphs]) |
| | file_summary = f"Extracted text from DOCX ({len(text)} characters)." |
| | elif ext == ".pdf": |
| | doc = fitz.open(file_path) |
| | text = "".join([page.get_text() for page in doc]) |
| | file_summary = f"Extracted text from PDF ({len(doc)} pages, {len(text)} characters)." |
| | else: |
| | file_summary = "(Unsupported file type — skipping file content.)" |
| |
|
| | full_prompt = system_prompt + file_summary + f"\n\nTASK:\n{question}" |
| | result = self.agent.run(full_prompt) |
| | return result.strip() |
| | except Exception as e: |
| | return f"AGENT ERROR: {str(e)}" |
| |
|