import os
import logging
import traceback
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, tool
from smolagents.models import OpenAIServerModel

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)

# Constants
SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
if not GITHUB_TOKEN:
    raise ValueError("CRITICAL: GITHUB_TOKEN environment variable not set.")
GITHUB_ENDPOINT = "https://models.github.ai/inference"
MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")

@tool
def wikipedia_lookup(page_title: str) -> str:
    """
    Fetches the summary intro text of an English Wikipedia page. Use exact titles.
    
    Args:
        page_title (str): The exact title of the Wikipedia page (e.g., 'Albert Einstein').
    """
    page_safe = page_title.replace(" ", "_")
    logger.info(f"Wikipedia lookup: '{page_title}'")
    try:
        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
        headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
        r = requests.get(url, headers=headers, timeout=15)
        r.raise_for_status()
        data = r.json()
        
        if extract := data.get("extract", ""):
            return extract
        
        title = data.get("title", page_title)
        if data.get("type") == "disambiguation":
            return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
        return f"Wikipedia Error: Page '{title}' found but has no summary."
    except requests.exceptions.HTTPError as e:
        status_code = e.response.status_code
        return f"Wikipedia Error: {'Page not found' if status_code == 404 else f'HTTP {status_code}'} for '{page_title}'."
    except Exception as e:
        return f"Wikipedia Error: {e}"

# Agent prompt - updated to mention only Wikipedia tool
REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
Available Tools:
- wikipedia_lookup(page_title: str): Looks up a specific English Wikipedia page. Use exact titles (e.g., 'Berlin').
Follow these steps:
1. Thought: Plan which tool to use and why.
2. Action: Call the tool (e.g., wikipedia_lookup(page_title="...")).
3. Observation: Record the result.
4. Thought: Analyze result. If answered, prepare final answer. If not, plan next step.
5. Repeat Action/Observation/Thought until answered or determined impossible.
6. Thought: Summarize findings based ONLY on observations.
7. Final Answer: Provide the answer starting exactly with "FINAL ANSWER: " using the required format (number, short string, or comma-separated list).
Formatting Rules for FINAL ANSWER:
- Numbers: Just the number (e.g., `42`).
- Strings: Minimal words, no articles. Digits as words (e.g., `seven`).
- Lists: Comma-separated (e.g., `paris,london,three`).
Let's begin!
"""

# Initialize LLM and agent
logger.info(f"Initializing LLM and agent: {MODEL_ID}")
try:
    llm_model = OpenAIServerModel(
        model_id=MODEL_ID,
        api_key=GITHUB_TOKEN,
        api_base=GITHUB_ENDPOINT
    )
    
    agent = CodeAgent(
        tools=[wikipedia_lookup],  # Only Wikipedia tool
        model=llm_model
    )
    logger.info("Agent initialization complete")
except Exception as e:
    logger.exception("CRITICAL: Agent initialization failed")
    raise RuntimeError(f"Agent initialization failed: {e}") from e

def run_agent_on_question(question: str) -> str:
    """Run the agent on a question and return the result."""
    question = question.strip()
    if not question: 
        return "AGENT_ERROR: Empty question"

    logger.info(f"Running agent on: '{question}'")
    try:
        return agent.run(f"{REACT_INSTRUCTION_PROMPT.strip()}\n\nQUESTION: {question}")
    except Exception as e:
        logger.exception("Agent run failed")
        return f"AGENT_ERROR: {e}\n{traceback.format_exc()}"

def evaluate_and_submit():
    """Evaluate all questions and submit answers."""
    logger.info("🚀 Starting evaluation...")
    username = os.getenv("HF_USERNAME", "unknown_user")
    
    # Fetch questions
    try:
        questions = requests.get(f"{SUBMISSION_URL}/questions", timeout=20).json()
        if not isinstance(questions, list):
            raise ValueError("Invalid response format")
        logger.info(f"✅ Fetched {len(questions)} questions")
    except Exception as e:
        logger.exception("Failed to fetch questions")
        return f"❌ Error fetching questions: {e}", pd.DataFrame()

    if not questions:
        return "ℹ️ No questions received", pd.DataFrame()

    # Process questions
    results_log = []
    answers_payload = []
    
    for i, item in enumerate(questions):
        task_id, question_text = item.get("task_id"), item.get("question")
        if not task_id or not question_text:
            continue
            
        logger.info(f"Processing Q{i+1}/{len(questions)}: ID={task_id}")
        raw_output = run_agent_on_question(question_text)
        
        # Extract final answer
        if "FINAL ANSWER:" in raw_output:
            final_answer = raw_output.split("FINAL ANSWER:", 1)[1].strip()
        elif "AGENT_ERROR:" in raw_output:
            final_answer = raw_output
        else:
            final_answer = "AGENT_ERROR: No final answer found"
            
        results_log.append({
            "Task ID": task_id,
            "Question": question_text,
            "Submitted Answer": final_answer,
            "Full Output": raw_output
        })
        answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})

    results_df = pd.DataFrame(results_log)
    if not answers_payload:
        return "⚠️ No answers generated", results_df

    # Submit answers
    logger.info(f"Submitting {len(answers_payload)} answers...")
    space_id = os.getenv("SPACE_ID", "NA")
    agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"
    
    try:
        response = requests.post(
            f"{SUBMISSION_URL}/submit",
            json={"username": username, "agent_code": agent_code_url, "answers": answers_payload},
            timeout=90
        ).json()
        
        score = response.get('score', 'N/A')
        score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
        return (f"✅ Success! Score: {score_str} "
                f"({response.get('correct_count','?')}/{response.get('total_attempted','?')}). "
                f"Msg: {response.get('message','')}"), results_df
    except Exception as e:
        err_msg = f"❌ Submission Failed: {e}"
        if hasattr(e, 'response') and e.response:
            err_msg += f" | Response: {e.response.text[:300]}"
        return err_msg, results_df

# Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🚀 Agent Evaluation Runner 🚀\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
    run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
    status_box = gr.Textbox(label="📊 Status", lines=4, interactive=False)
    results_display = gr.DataFrame(
        label="📋 Detailed Log", 
        headers=["Task ID", "Question", "Submitted Answer", "Full Output"],
        wrap=True, 
        column_widths=["10%", "25%", "20%", "45%"]
    )
    run_button.click(fn=evaluate_and_submit, outputs=[status_box, results_display])

if __name__ == "__main__":
    logger.info("Launching Gradio application...")
    demo.launch(debug=True, share=False)