Spaces:
Sleeping
Sleeping
| import os | |
| from typing import Dict, List, Optional | |
| from dotenv import load_dotenv | |
| from langchain_groq import ChatGroq | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| from langchain_community.document_loaders import WikipediaLoader | |
| from langchain_community.document_loaders import ArxivLoader | |
| import json | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import urllib.parse | |
| import pandas as pd | |
| import re | |
| load_dotenv() | |
| class BasicAgent: | |
| def __init__(self): | |
| self.llm = ChatGroq( | |
| model="meta-llama/llama-4-maverick-17b-128e-instruct", | |
| temperature=0.1 | |
| ) | |
| self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions. | |
| Key Rules: | |
| 1. Answer Format: | |
| - For numbers: Provide only the number without units, commas, or formatting | |
| - For text: Use minimal words, no articles or abbreviations | |
| - For lists: Use comma-separated values without additional formatting | |
| - For dates: Use YYYY-MM-DD format unless specified otherwise | |
| - For names: Use full names without titles or honorifics | |
| - For country codes: Use official IOC codes (3 letters) | |
| - For chess moves: Use standard algebraic notation | |
| - For currency: Use numbers only, no symbols | |
| 2. Answer Guidelines: | |
| - Be extremely precise and direct | |
| - Do not include any explanatory text | |
| - Do not use phrases like "FINAL ANSWER" or any markers | |
| - Do not include units unless explicitly requested | |
| - Do not use abbreviations unless they are standard (e.g., DNA, RNA) | |
| - For multiple choice: Provide only the letter or number of the correct answer | |
| - For reversed text: Provide the answer in normal text | |
| - For file-based questions: Focus on the specific information requested | |
| 3. Error Handling: | |
| - If uncertain, provide the most likely answer based on available information | |
| - If completely unsure, provide a reasonable default rather than an error message | |
| - For file processing errors, indicate the specific issue | |
| 4. Special Cases: | |
| - For mathematical questions: Provide the exact numerical result | |
| - For historical dates: Use the most widely accepted date | |
| - For scientific terms: Use the standard scientific notation | |
| - For geographical locations: Use official names without abbreviations | |
| - For audio/video questions: Focus on the specific detail requested""" | |
| # Initialize tools | |
| self.tools = [ | |
| self.wiki_search, | |
| self.web_search, | |
| self.arxiv_search | |
| ] | |
| def wiki_search(self, query: str) -> str: | |
| """Search Wikipedia for information.""" | |
| try: | |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() | |
| return "\n".join([doc.page_content for doc in search_docs]) | |
| except Exception as e: | |
| return f"Error searching Wikipedia: {str(e)}" | |
| def web_search(self, query: str) -> str: | |
| """Search the web using DuckDuckGo.""" | |
| try: | |
| encoded_query = urllib.parse.quote(query) | |
| url = f"https://html.duckduckgo.com/html/?q={encoded_query}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = [] | |
| for result in soup.find_all('div', class_='result__body'): | |
| title = result.find('h2', class_='result__title') | |
| snippet = result.find('a', class_='result__snippet') | |
| if title and snippet: | |
| results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}") | |
| if len(results) >= 3: | |
| break | |
| return "\n\n".join(results) if results else "No results found" | |
| except Exception as e: | |
| return f"Error searching web: {str(e)}" | |
| def arxiv_search(self, query: str) -> str: | |
| """Search Arxiv for scientific papers.""" | |
| try: | |
| search_docs = ArxivLoader(query=query, load_max_docs=2).load() | |
| return "\n".join([doc.page_content[:1000] for doc in search_docs]) | |
| except Exception as e: | |
| return f"Error searching Arxiv: {str(e)}" | |
| def process_file(self, file_name: str, question: str) -> str: | |
| """Process different types of files based on extension.""" | |
| try: | |
| if not file_name: | |
| return "No file provided" | |
| file_ext = file_name.split('.')[-1].lower() | |
| if file_ext == 'xlsx': | |
| df = pd.read_excel(file_name) | |
| return f"Excel file loaded with {len(df)} rows" | |
| elif file_ext == 'mp3': | |
| return "Audio file detected - requires speech processing" | |
| elif file_ext == 'png': | |
| return "Image file detected - requires image processing" | |
| elif file_ext == 'py': | |
| with open(file_name, 'r') as f: | |
| code = f.read() | |
| return f"Python code loaded: {len(code)} characters" | |
| else: | |
| return f"Unsupported file type: {file_ext}" | |
| except Exception as e: | |
| return f"Error processing file: {str(e)}" | |
| def __call__(self, question: str, file_name: str = None) -> str: | |
| try: | |
| if question.startswith('.'): | |
| question = question[::-1] | |
| file_info = "" | |
| if file_name: | |
| file_info = self.process_file(file_name, question) | |
| analysis_prompt = f"""Analyze this question and determine its type and required format: | |
| Question: {question} | |
| File Info: {file_info} | |
| Provide a JSON response with: | |
| 1. question_type: (number/text/list/date/name/multiple_choice/file_processing) | |
| 2. required_format: (specific format requirements) | |
| 3. key_terms: (important terms to search for) | |
| 4. file_processing_needed: (true/false)""" | |
| analysis_messages = [ | |
| SystemMessage(content="You are a question analyzer. Provide a JSON response."), | |
| HumanMessage(content=analysis_prompt) | |
| ] | |
| analysis = self.llm.invoke(analysis_messages) | |
| try: | |
| analysis_data = json.loads(analysis.content) | |
| except: | |
| analysis_data = { | |
| "question_type": "text", | |
| "required_format": "direct", | |
| "key_terms": question, | |
| "file_processing_needed": bool(file_name) | |
| } | |
| messages = [ | |
| SystemMessage(content=self.system_prompt), | |
| HumanMessage(content=f"""Question Type: {analysis_data['question_type']} | |
| Required Format: {analysis_data['required_format']} | |
| Key Terms: {analysis_data['key_terms']} | |
| File Processing: {analysis_data.get('file_processing_needed', False)} | |
| Question: {question}""") | |
| ] | |
| response = self.llm.invoke(messages) | |
| answer = response.content.strip() | |
| if answer.lower().startswith("final answer:"): | |
| answer = answer[len("final answer:"):].strip() | |
| if analysis_data['question_type'] == 'number': | |
| answer = ''.join(c for c in answer if c.isdigit() or c in '.-') | |
| elif analysis_data['question_type'] == 'list': | |
| answer = ','.join(item.strip() for item in answer.split(',')) | |
| elif analysis_data['question_type'] == 'country_code': | |
| answer = answer[:3].upper() | |
| elif analysis_data['question_type'] == 'chess_move': | |
| answer = re.sub(r'[^a-h1-8x+=#]', '', answer) | |
| return answer | |
| except Exception as e: | |
| print(f"Error in agent response: {e}") | |
| return f"Error processing question: {str(e)}" |