import os
from typing import Dict, List, Optional
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
import json
import requests
from bs4 import BeautifulSoup
import urllib.parse
import pandas as pd
import re

load_dotenv()

class BasicAgent:
    def __init__(self):
        self.llm = ChatGroq(
            model="meta-llama/llama-4-maverick-17b-128e-instruct", 
            temperature=0.1
        )
        
        self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.

Key Rules:
1. Answer Format:
   - For numbers: Provide only the number without units, commas, or formatting
   - For text: Use minimal words, no articles or abbreviations
   - For lists: Use comma-separated values without additional formatting
   - For dates: Use YYYY-MM-DD format unless specified otherwise
   - For names: Use full names without titles or honorifics
   - For country codes: Use official IOC codes (3 letters)
   - For chess moves: Use standard algebraic notation
   - For currency: Use numbers only, no symbols

2. Answer Guidelines:
   - Be extremely precise and direct
   - Do not include any explanatory text
   - Do not use phrases like "FINAL ANSWER" or any markers
   - Do not include units unless explicitly requested
   - Do not use abbreviations unless they are standard (e.g., DNA, RNA)
   - For multiple choice: Provide only the letter or number of the correct answer
   - For reversed text: Provide the answer in normal text
   - For file-based questions: Focus on the specific information requested

3. Error Handling:
   - If uncertain, provide the most likely answer based on available information
   - If completely unsure, provide a reasonable default rather than an error message
   - For file processing errors, indicate the specific issue

4. Special Cases:
   - For mathematical questions: Provide the exact numerical result
   - For historical dates: Use the most widely accepted date
   - For scientific terms: Use the standard scientific notation
   - For geographical locations: Use official names without abbreviations
   - For audio/video questions: Focus on the specific detail requested"""
        
        # Initialize tools
        self.tools = [
            self.wiki_search,
            self.web_search,
            self.arxiv_search
        ]
        
    def wiki_search(self, query: str) -> str:
        """Search Wikipedia for information."""
        try:
            search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
            return "\n".join([doc.page_content for doc in search_docs])
        except Exception as e:
            return f"Error searching Wikipedia: {str(e)}"
    
    def web_search(self, query: str) -> str:
        """Search the web using DuckDuckGo."""
        try:
            encoded_query = urllib.parse.quote(query)
            url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
            
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            results = []
            for result in soup.find_all('div', class_='result__body'):
                title = result.find('h2', class_='result__title')
                snippet = result.find('a', class_='result__snippet')
                
                if title and snippet:
                    results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
                
                if len(results) >= 3:
                    break
            
            return "\n\n".join(results) if results else "No results found"
            
        except Exception as e:
            return f"Error searching web: {str(e)}"
    
    def arxiv_search(self, query: str) -> str:
        """Search Arxiv for scientific papers."""
        try:
            search_docs = ArxivLoader(query=query, load_max_docs=2).load()
            return "\n".join([doc.page_content[:1000] for doc in search_docs])
        except Exception as e:
            return f"Error searching Arxiv: {str(e)}"
    
    def process_file(self, file_name: str, question: str) -> str:
        """Process different types of files based on extension."""
        try:
            if not file_name:
                return "No file provided"
                
            file_ext = file_name.split('.')[-1].lower()
            
            if file_ext == 'xlsx':
                df = pd.read_excel(file_name)
                return f"Excel file loaded with {len(df)} rows"
                
            elif file_ext == 'mp3':
                return "Audio file detected - requires speech processing"
                
            elif file_ext == 'png':
                return "Image file detected - requires image processing"
                
            elif file_ext == 'py':
                with open(file_name, 'r') as f:
                    code = f.read()
                return f"Python code loaded: {len(code)} characters"
                
            else:
                return f"Unsupported file type: {file_ext}"
                
        except Exception as e:
            return f"Error processing file: {str(e)}"
    
    def __call__(self, question: str, file_name: str = None) -> str:
        try:
            if question.startswith('.'):
                question = question[::-1]
            
            file_info = ""
            if file_name:
                file_info = self.process_file(file_name, question)
            
            analysis_prompt = f"""Analyze this question and determine its type and required format:
            Question: {question}
            File Info: {file_info}
            Provide a JSON response with:
            1. question_type: (number/text/list/date/name/multiple_choice/file_processing)
            2. required_format: (specific format requirements)
            3. key_terms: (important terms to search for)
            4. file_processing_needed: (true/false)"""
            
            analysis_messages = [
                SystemMessage(content="You are a question analyzer. Provide a JSON response."),
                HumanMessage(content=analysis_prompt)
            ]
            
            analysis = self.llm.invoke(analysis_messages)
            try:
                analysis_data = json.loads(analysis.content)
            except:
                analysis_data = {
                    "question_type": "text",
                    "required_format": "direct",
                    "key_terms": question,
                    "file_processing_needed": bool(file_name)
                }
            
            messages = [
                SystemMessage(content=self.system_prompt),
                HumanMessage(content=f"""Question Type: {analysis_data['question_type']}
                Required Format: {analysis_data['required_format']}
                Key Terms: {analysis_data['key_terms']}
                File Processing: {analysis_data.get('file_processing_needed', False)}
                
                Question: {question}""")
            ]
            
            response = self.llm.invoke(messages)
            
            answer = response.content.strip()
            
            if answer.lower().startswith("final answer:"):
                answer = answer[len("final answer:"):].strip()
            
            if analysis_data['question_type'] == 'number':
                answer = ''.join(c for c in answer if c.isdigit() or c in '.-')
            elif analysis_data['question_type'] == 'list':
                answer = ','.join(item.strip() for item in answer.split(','))
            elif analysis_data['question_type'] == 'country_code':
                answer = answer[:3].upper()
            elif analysis_data['question_type'] == 'chess_move':
                answer = re.sub(r'[^a-h1-8x+=#]', '', answer)
            
            return answer
            
        except Exception as e:
            print(f"Error in agent response: {e}")
            return f"Error processing question: {str(e)}"