import os from typing import Dict, List, Optional from dotenv import load_dotenv from langchain_groq import ChatGroq from langchain_core.messages import SystemMessage, HumanMessage from langchain_community.document_loaders import WikipediaLoader from langchain_community.document_loaders import ArxivLoader import json import requests from bs4 import BeautifulSoup import urllib.parse import pandas as pd import re load_dotenv() class BasicAgent: def __init__(self): self.llm = ChatGroq( model="meta-llama/llama-4-maverick-17b-128e-instruct", temperature=0.1 ) self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions. Key Rules: 1. Answer Format: - For numbers: Provide only the number without units, commas, or formatting - For text: Use minimal words, no articles or abbreviations - For lists: Use comma-separated values without additional formatting - For dates: Use YYYY-MM-DD format unless specified otherwise - For names: Use full names without titles or honorifics - For country codes: Use official IOC codes (3 letters) - For chess moves: Use standard algebraic notation - For currency: Use numbers only, no symbols 2. Answer Guidelines: - Be extremely precise and direct - Do not include any explanatory text - Do not use phrases like "FINAL ANSWER" or any markers - Do not include units unless explicitly requested - Do not use abbreviations unless they are standard (e.g., DNA, RNA) - For multiple choice: Provide only the letter or number of the correct answer - For reversed text: Provide the answer in normal text - For file-based questions: Focus on the specific information requested 3. Error Handling: - If uncertain, provide the most likely answer based on available information - If completely unsure, provide a reasonable default rather than an error message - For file processing errors, indicate the specific issue 4. Special Cases: - For mathematical questions: Provide the exact numerical result - For historical dates: Use the most widely accepted date - For scientific terms: Use the standard scientific notation - For geographical locations: Use official names without abbreviations - For audio/video questions: Focus on the specific detail requested""" # Initialize tools self.tools = [ self.wiki_search, self.web_search, self.arxiv_search ] def wiki_search(self, query: str) -> str: """Search Wikipedia for information.""" try: search_docs = WikipediaLoader(query=query, load_max_docs=2).load() return "\n".join([doc.page_content for doc in search_docs]) except Exception as e: return f"Error searching Wikipedia: {str(e)}" def web_search(self, query: str) -> str: """Search the web using DuckDuckGo.""" try: encoded_query = urllib.parse.quote(query) url = f"https://html.duckduckgo.com/html/?q={encoded_query}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') results = [] for result in soup.find_all('div', class_='result__body'): title = result.find('h2', class_='result__title') snippet = result.find('a', class_='result__snippet') if title and snippet: results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}") if len(results) >= 3: break return "\n\n".join(results) if results else "No results found" except Exception as e: return f"Error searching web: {str(e)}" def arxiv_search(self, query: str) -> str: """Search Arxiv for scientific papers.""" try: search_docs = ArxivLoader(query=query, load_max_docs=2).load() return "\n".join([doc.page_content[:1000] for doc in search_docs]) except Exception as e: return f"Error searching Arxiv: {str(e)}" def process_file(self, file_name: str, question: str) -> str: """Process different types of files based on extension.""" try: if not file_name: return "No file provided" file_ext = file_name.split('.')[-1].lower() if file_ext == 'xlsx': df = pd.read_excel(file_name) return f"Excel file loaded with {len(df)} rows" elif file_ext == 'mp3': return "Audio file detected - requires speech processing" elif file_ext == 'png': return "Image file detected - requires image processing" elif file_ext == 'py': with open(file_name, 'r') as f: code = f.read() return f"Python code loaded: {len(code)} characters" else: return f"Unsupported file type: {file_ext}" except Exception as e: return f"Error processing file: {str(e)}" def __call__(self, question: str, file_name: str = None) -> str: try: if question.startswith('.'): question = question[::-1] file_info = "" if file_name: file_info = self.process_file(file_name, question) analysis_prompt = f"""Analyze this question and determine its type and required format: Question: {question} File Info: {file_info} Provide a JSON response with: 1. question_type: (number/text/list/date/name/multiple_choice/file_processing) 2. required_format: (specific format requirements) 3. key_terms: (important terms to search for) 4. file_processing_needed: (true/false)""" analysis_messages = [ SystemMessage(content="You are a question analyzer. Provide a JSON response."), HumanMessage(content=analysis_prompt) ] analysis = self.llm.invoke(analysis_messages) try: analysis_data = json.loads(analysis.content) except: analysis_data = { "question_type": "text", "required_format": "direct", "key_terms": question, "file_processing_needed": bool(file_name) } messages = [ SystemMessage(content=self.system_prompt), HumanMessage(content=f"""Question Type: {analysis_data['question_type']} Required Format: {analysis_data['required_format']} Key Terms: {analysis_data['key_terms']} File Processing: {analysis_data.get('file_processing_needed', False)} Question: {question}""") ] response = self.llm.invoke(messages) answer = response.content.strip() if answer.lower().startswith("final answer:"): answer = answer[len("final answer:"):].strip() if analysis_data['question_type'] == 'number': answer = ''.join(c for c in answer if c.isdigit() or c in '.-') elif analysis_data['question_type'] == 'list': answer = ','.join(item.strip() for item in answer.split(',')) elif analysis_data['question_type'] == 'country_code': answer = answer[:3].upper() elif analysis_data['question_type'] == 'chess_move': answer = re.sub(r'[^a-h1-8x+=#]', '', answer) return answer except Exception as e: print(f"Error in agent response: {e}") return f"Error processing question: {str(e)}"