import os
import json
import time
import gradio as gr
from gradio import OAuthProfile
import requests
import concurrent.futures
import pandas as pd
from langchain_core.messages import HumanMessage
from agent import build_graph

# --- CONFIGURATION ---
print("DEBUG: Loading Antientropy Agent v5.8 (LangGraph + File Tools) code...")
SPACE_URL = "https://huggingface.co/spaces/jomasego/Antientropy/tree/main"
API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- THE ANTIENTROPY AGENT SETUP ---
# System prompt is now handled in agent.py via system_prompt.txt
# We will still prepend instructions to the user query just in case

# Initialize the Agent
graph = build_graph()

# --- HELPER FUNCTIONS ---
def get_questions():
    """Retrieve the full list of evaluation questions."""
    response = requests.get(f"{API_URL}/questions", timeout=30)
    if response.status_code == 200:
        return response.json()
    return []

def download_file(task_id):
    """Download the file associated with a task, if it exists."""
    response = requests.get(f"{API_URL}/files/{task_id}", timeout=30)
    if response.status_code == 200:
        # Use absolute path in /tmp for reliability
        data_dir = "/tmp/gaia_files"
        os.makedirs(data_dir, exist_ok=True)
        content_disp = response.headers.get("Content-Disposition")
        filename = f"{data_dir}/{task_id}_file"
        if content_disp and "filename=" in content_disp:
            raw_filename = content_disp.split('filename=')[1].strip('"')
            filename = f"{data_dir}/{raw_filename}"
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"📁 Downloaded file to: {filename} ({len(response.content)} bytes)")
        return filename
    return None

def clean_answer(raw_response):
    """Strip everything except the final answer."""
    if not raw_response:
        return ""
    clean = str(raw_response)
    
    # 1. Look for explicit "FINAL ANSWER:" marker (case insensitive)
    import re
    # Match "FINAL ANSWER:" with any casing
    match = re.search(r'FINAL\s*ANSWER\s*:\s*(.+)', clean, re.IGNORECASE | re.DOTALL)
    if match:
        answer = match.group(1).strip()
        # Remove trailing punctuation that might be added
        answer = answer.rstrip('.')
        # If multiline, take just the first line (the actual answer)
        if '\n' in answer:
            answer = answer.split('\n')[0].strip()
        return answer
            
    # 2. If no marker, the model didn't follow instructions - return empty or last line
    # This signals an error to the evaluation
    lines = [l.strip() for l in clean.strip().split('\n') if l.strip()]
    if lines:
        # Return the last non-empty line as a fallback
        return lines[-1]
    
    return clean.strip()

def run_agent_on_task(prompt):
    """Run the graph agent on a single prompt."""
    messages = [HumanMessage(content=prompt)]
    result = graph.invoke({"messages": messages})
    # The last message is the AI's final response
    return result["messages"][-1].content

def run_evaluation(profile: gr.OAuthProfile | None):
    """Main function to run the agent and submit results."""
    try:
        if profile is None:
            return "❌ Error: You must be logged in to submit. Please log in with Hugging Face.", pd.DataFrame()
        
        username = profile.username
    except Exception as e:
        return f"❌ Error retrieving user profile: {e}", pd.DataFrame()
    
    output = f"🚀 Antientropy Agent (LangGraph Edition) Initiated for user {username}...\n\n"
    
    # Check token again just in case
    if not os.environ.get("HF_TOKEN"):
         return (
            "❌ Missing HF token for model access. "
            "Add a Space secret named HF_TOKEN with read access.",
            pd.DataFrame(),
        )
    
    # 1. Fetch Questions
    questions = get_questions()
    output += f"📥 Fetched {len(questions)} tasks from the API.\n\n"
    
    submission_results = []
    questions_and_answers = []

    # 2. Solve Each Question
    per_task_timeout_sec = 180 # Increased timeout for LangGraph to 3 minutes
    for i, task in enumerate(questions, 1):
        task_id = task.get("id") or task.get("task_id")
        question_text = task.get("question")
        if not task_id or question_text is None:
            output += f"⚠️ Skipping malformed task: {task}\n\n"
            continue
        output += f"--- Solving Task {i}/{len(questions)} (ID: {task_id}) ---\n"
        
        # Add delay to avoid rate limits
        time.sleep(5)
        
        file_path = download_file(task_id)
        
        # Construct prompt with specific guidance based on file type
        prompt = f"Question: {question_text}"
        if file_path:
            ext = file_path.split('.')[-1].lower() if '.' in file_path else ''
            
            if ext in ['mp3', 'wav', 'ogg', 'flac', 'm4a']:
                prompt += f"\n\nIMPORTANT: An audio file has been downloaded to: '{file_path}'. You MUST use the transcribe_audio tool with this exact path to get the content."
            elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp']:
                prompt += f"\n\nIMPORTANT: An image file has been downloaded to: '{file_path}'. You MUST use extract_text_from_image or analyze_image tool with this exact path."
            elif ext == 'pdf':
                prompt += f"\n\nIMPORTANT: A PDF file has been downloaded to: '{file_path}'. You MUST use the read_pdf_file tool with this exact path."
            elif ext in ['csv']:
                prompt += f"\n\nIMPORTANT: A CSV file has been downloaded to: '{file_path}'. You MUST use analyze_csv_file or execute_code_multilang to read it."
            elif ext in ['xlsx', 'xls']:
                prompt += f"\n\nIMPORTANT: An Excel file has been downloaded to: '{file_path}'. You MUST use analyze_excel_file or execute_code_multilang to read it."
            elif ext in ['py', 'txt', 'json', 'xml', 'html', 'css', 'js', 'md']:
                prompt += f"\n\nIMPORTANT: A text/code file has been downloaded to: '{file_path}'. You MUST use read_file_content to read it first."
            else:
                prompt += f"\n\nIMPORTANT: A file has been downloaded to: '{file_path}'. You MUST use read_file_content or execute_code_multilang to read it."
        
        try:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(run_agent_on_task, prompt)
                response = future.result(timeout=per_task_timeout_sec)
            
            final_answer = clean_answer(response)
            output += f"✅ Agent Answer: {final_answer}\n\n"
            
            submission_results.append({
                "task_id": task_id,
                "submitted_answer": final_answer
            })
            
            questions_and_answers.append({
                "Task ID": task_id,
                "Question": question_text[:100] + "...",
                "Submitted Answer": final_answer
            })
            
        except concurrent.futures.TimeoutError:
            output += f"⏳ Timeout on task {task_id} after {per_task_timeout_sec}s\n\n"
            submission_results.append({
                "task_id": task_id,
                "submitted_answer": "Timeout"
            })
            questions_and_answers.append({
                "Task ID": task_id,
                "Question": question_text[:100] + "...",
                "Submitted Answer": "Timeout"
            })
        except Exception as e:
            error_msg = str(e)
            output += f"❌ Error on task {task_id}: {error_msg}\n\n"
            submission_results.append({
                "task_id": task_id,
                "submitted_answer": "Error"
            })
            questions_and_answers.append({
                "Task ID": task_id,
                "Question": question_text[:100] + "...",
                "Submitted Answer": f"Error: {error_msg}"
            })

    # 3. Submit to Leaderboard
    output += "\n📤 Submitting results to Leaderboard...\n"
    payload = {
        "username": username,
        "agent_code": SPACE_URL,
        "answers": submission_results
    }
    
    try:
        submit_response = requests.post(f"{API_URL}/submit", json=payload)
        if submit_response.status_code == 200:
            result = submit_response.json()
            output += "🎉 SUCCESS! Submission received.\n"
            output += json.dumps(result, indent=2)
        else:
            output += f"⚠️ Submission failed: {submit_response.text}"
    except Exception as e:
        output += f"⚠️ Submission failed with error: {e}"
        
    return output, pd.DataFrame(questions_and_answers)

# --- GRADIO INTERFACE ---
with gr.Blocks(title="Antientropy Final Assignment v5") as demo:
    gr.Markdown("# 🕵🏻‍♂️ Antientropy Agent - GAIA Benchmark v5 (LangGraph + Multimedia)")
    gr.Markdown(
        """
        **Instructions:**
        1. Log in to your Hugging Face account using the button below.
        2. Click 'Run Evaluation & Submit All Answers' to run the agent.
        """
    )
    
    gr.LoginButton()
    
    submit_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg")
    
    with gr.Row():
        status_output = gr.Textbox(label="Run Status / Submission Result", lines=15, max_lines=30)
    
    with gr.Row():
        results_table = gr.Dataframe(
            headers=["Task ID", "Question", "Submitted Answer"],
            label="Questions and Agent Answers"
        )
    
    submit_btn.click(
        fn=run_evaluation,
        inputs=None,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch()