Final_Assignment_Template

Build error

File size: 9,844 Bytes

import os
import re
import json
import time
import requests
import gradio as gr

# ── Constants ──────────────────────────────────────────────────────────────────
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"


# ── Helper Tools ───────────────────────────────────────────────────────────────
def download_file(task_id: str) -> str:
    """Download file from GAIA API."""
    url = f"{DEFAULT_API_URL}/files/{task_id}"
    try:
        resp = requests.get(url, timeout=30)
        resp.raise_for_status()
        return resp.text[:10000]
    except Exception as e:
        return f"[File error: {e}]"


# ── Direct Perplexity Call ─────────────────────────────────────────────────────
def call_perplexity(system_prompt: str, user_message: str, api_key: str) -> str:
    """Call Perplexity API directly."""
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    
    data = {
        "model": "sonar-pro",  # Updated model name (sonar-large was deprecated Feb 2025)
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_message},
        ],
        "temperature": 0.2,
        "max_tokens": 500,
    }
    
    try:
        resp = requests.post(PERPLEXITY_API_URL, headers=headers, json=data, timeout=60)
        resp.raise_for_status()
        result = resp.json()
        content = result["choices"][0]["message"]["content"]
        print(f"[DEBUG] Raw Perplexity response: {content[:200]}")  # Debug log
        return content
    except requests.HTTPError as e:
        error_body = e.response.text if hasattr(e.response, 'text') else str(e)
        print(f"[DEBUG] HTTP Error: {e.response.status_code} - {error_body[:200]}")
        return f"HTTP_ERROR: {e.response.status_code}"
    except Exception as e:
        print(f"[DEBUG] Exception: {str(e)[:200]}")
        return f"ERROR: {e}"


# ── Answer Cleaner ─────────────────────────────────────────────────────────────
def clean_answer(raw: str) -> str:
    """Strip preamble and get bare answer."""
    original = raw
    raw = str(raw).strip()
    
    # Don't clean error messages - return them as-is for debugging
    if raw.startswith("ERROR:") or raw.startswith("HTTP_ERROR:"):
        return raw
    
    # Remove common prefixes
    for prefix in ["FINAL ANSWER:", "Final Answer:", "final answer:", "Answer:", "answer:", 
                   "The answer is:", "The answer is", "Result:", "**Answer:**", "Based on"]:
        if raw.lower().startswith(prefix.lower()):
            raw = raw[len(prefix):].strip()
            break
    
    # Take first line if multi-line and short enough
    if '\n' in raw:
        first_line = raw.split('\n')[0].strip()
        if len(first_line) < 150:
            raw = first_line
    
    # Remove quotes
    if len(raw) >= 2 and raw[0] in ('"', "'") and raw[0] == raw[-1]:
        raw = raw[1:-1].strip()
    
    # Remove markdown bold
    raw = re.sub(r'\*\*(.*?)\*\*', r'\1', raw)
    
    # DON'T filter out answers - just clean them
    result = raw.strip()
    
    # Debug log the cleaning
    if result != original:
        print(f"[DEBUG] Cleaned '{original[:100]}...' → '{result[:100]}'")
    
    return result


# ── System Prompt ──────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """Answer the question with ONLY the final answer. No explanation.
RULES:
- NO periods at end ("right" NOT "right.")
- Numbers: digits only ("42")
- Country names: full name ("Malta" NOT "MLT")
- Lists: comma-separated
EXAMPLES:
Q: "What year was Mona Lisa painted?" → 1503
Q: "Opposite of left" → right
"""


# ── Main Runner ────────────────────────────────────────────────────────────────
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "❌ Please log in first.", None

    username = profile.username
    api_key = os.environ.get("PERPLEXITY_API_KEY")
    
    if not api_key:
        return "❌ PERPLEXITY_API_KEY not found in Space secrets!", None
    
    # Test the API key first
    print(f"[DEBUG] API key exists, length: {len(api_key)}, starts with: {api_key[:10]}")
    
    space_id = os.environ.get("SPACE_ID", "")
    agent_code_url = (
        f"https://huggingface.co/spaces/{space_id}/tree/main"
        if space_id
        else f"https://huggingface.co/spaces/{username}/my-gaia-agent/tree/main"
    )

    log = [f"👤 User: {username}", "📥 Fetching questions..."]

    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
        log.append(f"✅ {len(questions)} questions loaded")
    except Exception as e:
        return f"❌ Failed to fetch questions: {e}", None

    log.append(f"🤖 Testing Perplexity API (key: {api_key[:10]}...)")
    
    # Test API with simple question first
    test_answer = call_perplexity("You are helpful.", "What is 2+2?", api_key)
    log.append(f"🧪 Test call result: {test_answer[:100]}")
    
    log.append("─" * 40)

    answers = []
    results_log = []

    for i, q in enumerate(questions):
        task_id = q.get("task_id", "")
        question_text = q.get("question", "")
        log.append(f"[{i+1}/20] {question_text[:65]}...")

        # Rate limit
        if i > 0:
            time.sleep(4)

        # Check if file mentioned
        file_content = ""
        if any(word in question_text.lower() for word in ["file", "image", "attached", "spreadsheet", "document", "excel"]):
            file_content = download_file(task_id)
            if not file_content.startswith("[File error"):
                question_text = f"{question_text}\n\nFile content:\n{file_content[:2000]}"

        # Call Perplexity
        try:
            user_prompt = f"Question: {question_text}\n\nAnswer with ONLY the answer, nothing else."
            raw_answer = call_perplexity(SYSTEM_PROMPT, user_prompt, api_key)
            final_answer = clean_answer(raw_answer)
            
            log.append(f"  📝 Raw: {raw_answer[:80]}")
            log.append(f"  ✅ Final: {final_answer[:80] if final_answer else '(empty after cleaning)'}")
        
        except Exception as e:
            final_answer = f"EXCEPTION: {str(e)[:80]}"
            log.append(f"  ❌ Error: {final_answer}")

        answers.append({"task_id": task_id, "submitted_answer": final_answer})
        results_log.append({
            "#": i + 1,
            "Task ID": task_id[:8] + "...",
            "Question": question_text[:65] + "..." if len(question_text) > 65 else question_text,
            "Answer": final_answer or "(empty)",
        })

    answered = sum(1 for a in answers if a["submitted_answer"] and not a["submitted_answer"].startswith("ERROR") and not a["submitted_answer"].startswith("EXCEPTION"))
    log.append("─" * 40)
    log.append(f"📊 Answered: {answered}/20")

    # Submit
    payload = {
        "username": username,
        "agent_code": agent_code_url,
        "answers": answers,
    }

    for attempt in range(3):
        try:
            log.append(f"📤 Submitting ({attempt+1}/3)...")
            sub = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
            sub.raise_for_status()
            data = sub.json()
            score = data.get("score", "N/A")
            correct = data.get("correct_count", "?")
            log += [
                "─" * 40,
                "✅ SUBMITTED!",
                f"📊 Score: {score}%",
                f"✔️  Correct: {correct}/20",
                f"🔗 {agent_code_url}",
                "─" * 40,
                "🏆 https://huggingface.co/spaces/agents-course/Students_leaderboard",
            ]
            break
        except Exception as e:
            log.append(f"⚠️ Failed: {str(e)[:60]}")
            if attempt < 2:
                time.sleep(5)

    return "\n".join(log), results_log


# ── Gradio UI ──────────────────────────────────────────────────────────────────
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🤖 HF Agents Course — Unit 4
    **Perplexity Sonar Large (Direct API with Debug Logging)**
    1. Log in with Hugging Face
    2. Click Run & Submit
    3. Check the logs to see what Perplexity is returning
    """)

    gr.LoginButton()
    run_btn = gr.Button("🚀 Run Agent & Submit All Answers", variant="primary", size="lg")
    status_box = gr.Textbox(label="Live Log (with debug info)", lines=25, interactive=False)
    results_table = gr.DataFrame(label="Results", headers=["#", "Task ID", "Question", "Answer"])

    run_btn.click(fn=run_and_submit_all, outputs=[status_box, results_table])

    gr.Markdown("**Debug version** - Shows raw Perplexity responses")

demo.launch()