Tingusto's picture
Initialize agent
5de5d19
raw
history blame
8.53 kB
import os
from typing import Dict, List, Optional
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
import json
import requests
from bs4 import BeautifulSoup
import urllib.parse
import pandas as pd
import re
load_dotenv()
class BasicAgent:
def __init__(self):
self.llm = ChatGroq(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
temperature=0.1
)
self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
Key Rules:
1. Answer Format:
- For numbers: Provide only the number without units, commas, or formatting
- For text: Use minimal words, no articles or abbreviations
- For lists: Use comma-separated values without additional formatting
- For dates: Use YYYY-MM-DD format unless specified otherwise
- For names: Use full names without titles or honorifics
- For country codes: Use official IOC codes (3 letters)
- For chess moves: Use standard algebraic notation
- For currency: Use numbers only, no symbols
2. Answer Guidelines:
- Be extremely precise and direct
- Do not include any explanatory text
- Do not use phrases like "FINAL ANSWER" or any markers
- Do not include units unless explicitly requested
- Do not use abbreviations unless they are standard (e.g., DNA, RNA)
- For multiple choice: Provide only the letter or number of the correct answer
- For reversed text: Provide the answer in normal text
- For file-based questions: Focus on the specific information requested
3. Error Handling:
- If uncertain, provide the most likely answer based on available information
- If completely unsure, provide a reasonable default rather than an error message
- For file processing errors, indicate the specific issue
4. Special Cases:
- For mathematical questions: Provide the exact numerical result
- For historical dates: Use the most widely accepted date
- For scientific terms: Use the standard scientific notation
- For geographical locations: Use official names without abbreviations
- For audio/video questions: Focus on the specific detail requested"""
# Initialize tools
self.tools = [
self.wiki_search,
self.web_search,
self.arxiv_search
]
def wiki_search(self, query: str) -> str:
"""Search Wikipedia for information."""
try:
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
return "\n".join([doc.page_content for doc in search_docs])
except Exception as e:
return f"Error searching Wikipedia: {str(e)}"
def web_search(self, query: str) -> str:
"""Search the web using DuckDuckGo."""
try:
encoded_query = urllib.parse.quote(query)
url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for result in soup.find_all('div', class_='result__body'):
title = result.find('h2', class_='result__title')
snippet = result.find('a', class_='result__snippet')
if title and snippet:
results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
if len(results) >= 3:
break
return "\n\n".join(results) if results else "No results found"
except Exception as e:
return f"Error searching web: {str(e)}"
def arxiv_search(self, query: str) -> str:
"""Search Arxiv for scientific papers."""
try:
search_docs = ArxivLoader(query=query, load_max_docs=2).load()
return "\n".join([doc.page_content[:1000] for doc in search_docs])
except Exception as e:
return f"Error searching Arxiv: {str(e)}"
def process_file(self, file_name: str, question: str) -> str:
"""Process different types of files based on extension."""
try:
if not file_name:
return "No file provided"
file_ext = file_name.split('.')[-1].lower()
if file_ext == 'xlsx':
df = pd.read_excel(file_name)
return f"Excel file loaded with {len(df)} rows"
elif file_ext == 'mp3':
return "Audio file detected - requires speech processing"
elif file_ext == 'png':
return "Image file detected - requires image processing"
elif file_ext == 'py':
with open(file_name, 'r') as f:
code = f.read()
return f"Python code loaded: {len(code)} characters"
else:
return f"Unsupported file type: {file_ext}"
except Exception as e:
return f"Error processing file: {str(e)}"
def __call__(self, question: str, file_name: str = None) -> str:
try:
if question.startswith('.'):
question = question[::-1]
file_info = ""
if file_name:
file_info = self.process_file(file_name, question)
analysis_prompt = f"""Analyze this question and determine its type and required format:
Question: {question}
File Info: {file_info}
Provide a JSON response with:
1. question_type: (number/text/list/date/name/multiple_choice/file_processing)
2. required_format: (specific format requirements)
3. key_terms: (important terms to search for)
4. file_processing_needed: (true/false)"""
analysis_messages = [
SystemMessage(content="You are a question analyzer. Provide a JSON response."),
HumanMessage(content=analysis_prompt)
]
analysis = self.llm.invoke(analysis_messages)
try:
analysis_data = json.loads(analysis.content)
except:
analysis_data = {
"question_type": "text",
"required_format": "direct",
"key_terms": question,
"file_processing_needed": bool(file_name)
}
messages = [
SystemMessage(content=self.system_prompt),
HumanMessage(content=f"""Question Type: {analysis_data['question_type']}
Required Format: {analysis_data['required_format']}
Key Terms: {analysis_data['key_terms']}
File Processing: {analysis_data.get('file_processing_needed', False)}
Question: {question}""")
]
response = self.llm.invoke(messages)
answer = response.content.strip()
if answer.lower().startswith("final answer:"):
answer = answer[len("final answer:"):].strip()
if analysis_data['question_type'] == 'number':
answer = ''.join(c for c in answer if c.isdigit() or c in '.-')
elif analysis_data['question_type'] == 'list':
answer = ','.join(item.strip() for item in answer.split(','))
elif analysis_data['question_type'] == 'country_code':
answer = answer[:3].upper()
elif analysis_data['question_type'] == 'chess_move':
answer = re.sub(r'[^a-h1-8x+=#]', '', answer)
return answer
except Exception as e:
print(f"Error in agent response: {e}")
return f"Error processing question: {str(e)}"