import os
import requests
from dotenv import load_dotenv
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel

load_dotenv()

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
INSTRUCTIONS = """You are a general AI assistant. I will ask you a question. Report your thoughts, and then provide your final answer.

CRITICAL FORMATTING RULES:
- Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string
- Be extremely precise with spelling and formatting - the evaluation uses exact matching
- For strings: no extra spaces, no punctuation unless part of the answer, lowercase
- For numbers: just the number, no units, no commas, no currency symbols
- Provide ONLY the answer as your final response, nothing else
- Expand abbreviations like 'St.' to 'Saint' in city names

You have access to a web search tool to help you find accurate information. Use it when you need to look up facts."""

def run_gaia_evaluation():
    print("🚀 GAIA Benchmark Evaluation with Ollama")
    print("=" * 60)

    username = os.getenv("HF_USERNAME")
    if not username:
        print("❌ Please set HF_USERNAME environment variable")
        return
    print(f"👤 User: {username}")

    model = LiteLLMModel(
        model_id="ollama_chat/gemma3",
        api_base="http://localhost:11434",
        num_ctx=8192,
        temperature=0.1,  # Low temperature for more deterministic answers
    )

    agent = CodeAgent(
        tools=[DuckDuckGoSearchTool()],
        model=model,
        instructions=INSTRUCTIONS,
        max_steps=10,
    )

    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
        resp.raise_for_status()
        data = resp.json()
        questions = data if isinstance(data, list) else data.get("questions", [])
        print(f"📋 Loaded {len(questions)} questions")
    except requests.RequestException as e:
        print(f"❌ Error fetching questions: {e}")
        return

    results = []
    for i, q in enumerate(questions):
        task_id = q["task_id"]
        text = q["question"]
        print(f"\n❓ Question {i+1}: {text}")

        result = agent.run(text, reset=True)
        result_str = str(result).strip()

        # Take the last line as the answer (since agent should provide only the answer)
        out = result_str.splitlines()[-1] if result_str else "AGENT ERROR: No response."

        if out.startswith("{"):
            out = "AGENT ERROR: No final answer."

        out = out.strip().rstrip(".")
        results.append({"task_id": task_id, "submitted_answer": out})
        print(f"✅ Answer: '{out}'")
        print(f"📝 Preview: {result_str[:200]}...")

    # Submit answers automatically
    payload = {
        "username": username,
        "agent_code": "ollama-gemma3-with-tools",
        "answers": results,
    }
    try:
        post = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
        post.raise_for_status()
        res = post.json()
        print("\n" + "=" * 60)
        print("🏆 GAIA BENCHMARK RESULTS")
        print("=" * 60)
        print(f"👤 User: {res.get('username', username)}")
        print(f"📊 Overall Score: {res.get('score', res.get('overall_score', 'N/A'))}%")
        print(f"✅ Correct: {res.get('correct_count', res.get('num_correct', 'N/A'))}/{len(results)}")
        print(f"💬 Message: {res.get('message', 'N/A')}")
        print("=" * 60)
    except requests.RequestException as e:
        print(f"❌ Error submitting: {e}")
        done = sum(1 for r in results if not r["submitted_answer"].startswith("AGENT ERROR"))
        print(f"Completed locally: {done}/{len(results)}")

if __name__ == "__main__":
    run_gaia_evaluation()