import os
import gradio as gr
import requests
import pandas as pd

from smolagents import CodeAgent, InferenceClientModel, OpenAIModel


# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

GAIA_SYSTEM_PROMPT = """You are solving GAIA level 1 questions with extreme precision.

CRITICAL RULES:
1. Return ONLY the final answer - no explanations, no context, no preamble
2. For numbers: just the number (no units unless explicitly requested)
3. For strings: just the answer (no articles like "the" or "a")
4. For lists: format as "item1, item2, item3" (no quotes, no brackets)

STRATEGY:
- Use web search liberally - search multiple times with different keywords
- Visit actual webpages to get complete information
- Cross-reference multiple sources
- Think step-by-step but output only the final answer
- If you find relevant info but not the complete answer, search again with more specific terms

NEVER output:
- "FINAL ANSWER:"
- "The answer is:"
- Explanations or reasoning
- "No information found" (keep searching!)

Examples of correct outputs:
Question: "How many studio albums?" → Answer: "7"
Question: "What is the capital?" → Answer: "Paris"
Question: "List the winners" → Answer: "John, Mary, Bob"
"""


class SmolGaiaAgent:
    """
    Premium agent optimized for maximum accuracy on GAIA Level 1.
    """

    def __init__(self):
        print("Initializing Premium SmolGaiaAgent...")

        # Use the most capable model available
        # Option 1: Qwen 32B (current - good balance)
        self.model = OpenAIModel(
            model_id="gpt-4.1",
            api_key=os.getenv("OPENAI_API_KEY"),
        )
        
        # Option 2: Try Claude or GPT-4 via API if available
        # self.model = InferenceClientModel(
        #     model_id="anthropic/claude-3-5-sonnet",
        #     api_key=os.getenv("ANTHROPIC_API_KEY"),
        # )

        # MORE STEPS = Better accuracy (but slower)
        try:
            self.agent = CodeAgent(
                tools=[],
                add_base_tools=True,
                model=self.model,
                max_steps=12,  # INCREASED from 6 to 12 for thorough reasoning
                system_prompt=GAIA_SYSTEM_PROMPT,
            )
            print("Agent initialized with system_prompt parameter")
            self.use_task_prefix = False
        except TypeError as e:
            print(f"system_prompt not supported, using task prefix: {e}")
            self.agent = CodeAgent(
                tools=[],
                add_base_tools=True,
                model=self.model,
                max_steps=12,
            )
            self.use_task_prefix = True

    def __call__(self, question: str) -> str:
        """
        Runs the CodeAgent on one question with enhanced answer extraction.
        """
        print(f"[Premium Agent] Question: {question[:80]}...")
        
        if self.use_task_prefix:
            task = f"{GAIA_SYSTEM_PROMPT}\n\nTask: {question}"
        else:
            task = question
            
        try:
            answer = self.agent.run(task)
            answer = str(answer).strip()
            
            # Enhanced answer cleaning
            answer = self.aggressive_clean_answer(answer)
            
            print(f"[Premium Agent] Final Answer: {answer}")
            return answer
        except Exception as e:
            print(f"[Premium Agent] Error: {e}")
            import traceback
            traceback.print_exc()
            return "Error processing question"
    
    def aggressive_clean_answer(self, answer: str) -> str:
        """
        Aggressively clean the answer to extract just the answer.
        """
        original = answer
        
        # Remove common prefixes (case insensitive)
        prefixes_to_remove = [
            "final answer:",
            "the final answer is:",
            "answer:",
            "the answer is:",
            "the answer is",
            "result:",
            "solution:",
            "output:",
        ]
        
        answer_lower = answer.lower()
        for prefix in prefixes_to_remove:
            if answer_lower.startswith(prefix):
                answer = answer[len(prefix):].strip()
                answer_lower = answer.lower()
        
        # Remove surrounding quotes
        if (answer.startswith('"') and answer.endswith('"')) or \
           (answer.startswith("'") and answer.endswith("'")):
            answer = answer[1:-1].strip()
        
        # If answer contains "is:" extract what comes after
        if " is:" in answer.lower():
            parts = answer.split("is:")
            if len(parts) > 1:
                answer = parts[-1].strip()
        
        # If answer contains "are:" extract what comes after
        if " are:" in answer.lower():
            parts = answer.split("are:")
            if len(parts) > 1:
                answer = parts[-1].strip()
        
        # Remove trailing periods (unless it's a decimal number)
        if answer.endswith('.') and not answer[-2].isdigit():
            answer = answer[:-1].strip()
        
        # If answer starts with "The " and is followed by a name/noun, remove "The "
        if answer.startswith("The ") and len(answer) > 4:
            # Check if next word is capitalized (likely a proper noun)
            next_word = answer.split()[1] if len(answer.split()) > 1 else ""
            if next_word and next_word[0].isupper():
                answer = answer[4:].strip()
        
        # Remove "a " or "an " from the beginning
        if answer.lower().startswith("a "):
            answer = answer[2:].strip()
        elif answer.lower().startswith("an "):
            answer = answer[3:].strip()
        
        print(f"[Cleaning] Original: '{original}' → Cleaned: '{answer}'")
        return answer


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the Premium Agent, submits answers.
    """
    space_id = os.getenv("SPACE_ID")

    if profile is None:
        return "Please Login to Hugging Face with the button.", None
    
    try:
        username = profile.username
        print(f"User logged in: {username}")
    except AttributeError:
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent
    print("\n" + "="*70)
    print("INITIALIZING PREMIUM AGENT")
    print("="*70)
    try:
        agent = SmolGaiaAgent()
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        import traceback
        traceback.print_exc()
        return f"Error initializing agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 2. Fetch Questions
    print(f"\nFetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
        print(f"✓ Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 3. Run Agent with detailed progress tracking
    results_log = []
    answers_payload = []
    total = len(questions_data)
    
    print("\n" + "="*70)
    print(f"PROCESSING {total} QUESTIONS")
    print("="*70 + "\n")
    
    for idx, item in enumerate(questions_data, 1):
        task_id = item.get("task_id")
        question_text = item.get("question")
        
        if not task_id or question_text is None:
            print(f"⚠ Skipping item with missing task_id or question")
            continue
        
        print(f"\n{'='*70}")
        print(f"QUESTION {idx}/{total}")
        print(f"Task ID: {task_id}")
        print(f"Question: {question_text[:100]}...")
        print('='*70)
        
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": submitted_answer
            })
            print(f"✓ Answer recorded: {submitted_answer}")
        except Exception as e:
            print(f"✗ Error processing question: {e}")
            import traceback
            traceback.print_exc()
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"AGENT ERROR: {e}"
            })

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Submit
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }
    
    print("\n" + "="*70)
    print(f"SUBMITTING {len(answers_payload)} ANSWERS")
    print("="*70)
    
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        
        score = result_data.get('score', 'N/A')
        correct = result_data.get('correct_count', '?')
        total_attempted = result_data.get('total_attempted', '?')
        
        final_status = (
            f"🎉 Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {score}% ({correct}/{total_attempted} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}\n\n"
            f"{'🏆 EXCELLENT!' if float(score) >= 80 else '👍 Good job!' if float(score) >= 50 else '💪 Keep improving!'}"
        )
        print(f"\n✓ Submission successful! Score: {score}%")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except Exception as e:
        print(f"✗ Submission error: {e}")
        results_df = pd.DataFrame(results_log)
        return f"Submission Failed: {e}", results_df


# --- Build Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# 🏆 Premium Agent - Optimized for Maximum Accuracy")
    gr.Markdown(
        """
        **Current Configuration:**
        - 🧠 Model: Qwen/Qwen2.5-Coder-32B-Instruct (most capable)
        - 🔄 Max Steps: 12 (thorough reasoning)
        - 🧹 Enhanced answer cleaning
        - 📊 Detailed progress logging
        
        **Target Performance:**
        - ⏱️ Time: ~20-25 minutes for 20 questions
        - 🎯 Target Score: 60-80% (realistic for Level 1)
        - 🏆 Stretch Goal: 80%+ with optimal configuration
        
        **To Reach 100%:**
        Getting 100% on GAIA Level 1 is extremely difficult. The benchmark shows:
        - GPT-4 achieves ~70-80%
        - Claude 3.5 achieves ~75-85%
        - Human experts achieve ~90-95%
        
        For the best possible score:
        1. ✅ Use this premium configuration (12 steps, 32B model)
        2. 🔍 Manually review failed questions and add custom logic
        3. 🛠️ Create specialized tools for specific question types
        4. 🧪 Test and iterate on difficult questions
        """
    )

    gr.LoginButton()
    run_button = gr.Button("🚀 Run Premium Evaluation & Submit")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "="*70)
    print("PREMIUM AGENT STARTING")
    print("="*70)
    
    space_host = os.getenv("SPACE_HOST")
    space_id = os.getenv("SPACE_ID")

    if space_host:
        print(f"✓ Runtime URL: https://{space_host}.hf.space")
    if space_id:
        print(f"✓ Repo URL: https://huggingface.co/spaces/{space_id}/tree/main")

    print("="*70 + "\n")
    demo.launch(debug=True, share=False)