Spaces:

jebaselvasingh
/

mycertification

Sleeping

File size: 10,580 Bytes

93b72dc
 
 
 
 
 
 
 
d1dcd56
93b72dc
d1dcd56
 
 
93b72dc
 
 
 
 
 
 
d1dcd56
 
 
93b72dc
 
 
d1dcd56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93b72dc
 
 
d1dcd56
 
 
 
93b72dc
 
 
 
d1dcd56
93b72dc
 
 
d1dcd56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93b72dc
 
 
d1dcd56
 
 
 
 
 
93b72dc
 
d1dcd56
 
 
 
 
 
 
 
 
 
 
 
 
 
93b72dc
d1dcd56
93b72dc
 
d1dcd56
 
 
93b72dc
 
 
d1dcd56
93b72dc
d1dcd56
93b72dc
 
 
 
d1dcd56
93b72dc
 
 
 
 
 
 
 
 
 
 
d1dcd56
93b72dc
 
 
 
d1dcd56
93b72dc
d1dcd56
93b72dc
 
 
 
 
 
 
d1dcd56
93b72dc
 
 
d1dcd56
 
 
93b72dc
 
 
 
 
 
d1dcd56
93b72dc
 
 
d1dcd56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93b72dc
d1dcd56
93b72dc
 
 
 
 
 
 
d1dcd56
93b72dc
 
 
 
 
e95a92e
d1dcd56
93b72dc
 
 
d1dcd56
93b72dc
 
d1dcd56
93b72dc
 
 
 
d1dcd56
93b72dc
d1dcd56
93b72dc
 
 
d1dcd56
93b72dc
 
d1dcd56
 
93b72dc
d1dcd56
93b72dc
d1dcd56
93b72dc
 
d1dcd56
93b72dc
 
d1dcd56
93b72dc
d1dcd56
 
 
 
93b72dc
d1dcd56
93b72dc
d1dcd56
 
 
93b72dc
 
d1dcd56
93b72dc
d1dcd56
 
93b72dc
 
d1dcd56
 
93b72dc
d1dcd56
 
93b72dc
 
d1dcd56
 
93b72dc
d1dcd56
 
93b72dc
 
 
d1dcd56
 
 
93b72dc

import os
import gradio as gr
import requests
import pandas as pd
import tempfile
import json
import logging
from typing import Optional
from dotenv import load_dotenv

load_dotenv()

from agent_enhanced import GAIAAgent, is_ollama_available, is_production

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def fetch_questions(api_url: str = DEFAULT_API_URL) -> list:
    """Fetch all questions from the GAIA API."""
    for attempt in range(3):
        try:
            response = requests.get(f"{api_url}/questions", timeout=30)
            response.raise_for_status()
            questions = response.json()
            
            # Print all questions with their task IDs
            print("\n" + "="*80)
            print("ALL QUESTIONS WITH TASK IDs:")
            print("="*80)
            for i, q in enumerate(questions, 1):
                task_id = q.get("task_id", "N/A")
                question_text = q.get("question", "N/A")
                file_name = q.get("file_name", "")
                print(f"\n[{i}] Task ID: {task_id}")
                print(f"    Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}")
                if file_name:
                    print(f"    File: {file_name}")
            print("\n" + "="*80)
            print(f"Total questions: {len(questions)}")
            print("="*80 + "\n")
            
            return questions
        except Exception as e:
            logger.warning(f"Attempt {attempt + 1} failed: {e}")
    return []


def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict:
    """Fetch a random question."""
    for attempt in range(3):
        try:
            response = requests.get(f"{api_url}/random-question", timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            logger.warning(f"Attempt {attempt + 1} failed: {e}")
    return {}


def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]:
    """Fetch file for a task."""
    try:
        response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
        if response.status_code == 200:
            content_disposition = response.headers.get('content-disposition', '')
            filename = f"task_{task_id}_file"
            if 'filename=' in content_disposition:
                filename = content_disposition.split('filename=')[1].strip('"')
            
            temp_dir = tempfile.mkdtemp()
            file_path = os.path.join(temp_dir, filename)
            
            with open(file_path, 'wb') as f:
                f.write(response.content)
            
            logger.info(f"Downloaded: {file_path}")
            return file_path
        elif response.status_code == 404:
            return None
    except Exception as e:
        logger.error(f"File fetch failed: {e}")
    return None


def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict:
    """Submit answers to API."""
    payload = {"username": username, "agent_code": agent_code, "answers": answers}
    response = requests.post(f"{api_url}/submit", json=payload, timeout=60)
    response.raise_for_status()
    return response.json()


def get_env_status() -> str:
    """Get environment status."""
    if is_production():
        return "☁️ **Production Mode** (HuggingFace Spaces) - Using OpenAI GPT-4o"
    elif is_ollama_available():
        return "🏠 **Local Mode** - Using Ollama"
    elif os.environ.get("OPENAI_API_KEY"):
        return "☁️ **Local + OpenAI** - Using OpenAI GPT-4o"
    else:
        return "⚠️ **No Backend** - Set OPENAI_API_KEY or start Ollama"


def run_agent_on_questions(progress=gr.Progress()):
    """Run agent on all questions."""
    try:
        env_info = get_env_status()
        progress(0, desc="Initializing agent...")
        
        agent = GAIAAgent()
        
        progress(0.05, desc="Fetching questions...")
        questions = fetch_questions()
        
        if not questions:
            return "Error: Failed to fetch questions.", None
        
        total = len(questions)
        results = []
        answers_for_submission = []
        
        for i, q in enumerate(questions):
            progress((i + 1) / total, desc=f"Question {i+1}/{total}...")
            
            task_id = q.get("task_id", "")
            question_text = q.get("question", "")
            
            file_path = None
            if q.get("file_name"):
                file_path = fetch_file(task_id)
            
            try:
                answer = agent.run(question_text, task_id, file_path)
            except Exception as e:
                logger.error(f"Error on question {i+1}: {e}")
                answer = f"Error: {str(e)}"
            
            results.append({
                "Task ID": task_id,
                "Question": question_text,
                "Answer": answer,
                "Status": "✓" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "✗"
            })
            
            answers_for_submission.append({
                "task_id": task_id,
                "submitted_answer": answer
            })
            
            # Cleanup
            if file_path and os.path.exists(file_path):
                try:
                    os.remove(file_path)
                    os.rmdir(os.path.dirname(file_path))
                except:
                    pass
        
        df = pd.DataFrame(results)
        progress(1.0, desc="Complete!")
        return df, answers_for_submission
        
    except Exception as e:
        logger.error(f"Error: {e}")
        return f"Error: {str(e)}", None


def test_single_question():
    """Test on a single random question."""
    try:
        agent = GAIAAgent()
        question_data = fetch_random_question()
        
        if not question_data:
            return "Error: Failed to fetch question.", "", "", ""
        
        task_id = question_data.get("task_id", "")
        question_text = question_data.get("question", "")
        
        file_path = None
        if question_data.get("file_name"):
            file_path = fetch_file(task_id)
        
        answer = agent.run(question_text, task_id, file_path)
        
        # Cleanup
        if file_path and os.path.exists(file_path):
            try:
                os.remove(file_path)
                os.rmdir(os.path.dirname(file_path))
            except:
                pass
        
        status = "✓ Valid" if answer and not answer.startswith("Error") else "⚠️ Check answer"
        return question_text, answer, task_id, status
        
    except Exception as e:
        logger.error(f"Error: {e}")
        return f"Error: {str(e)}", "", "", ""


def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
    """Submit to leaderboard."""
    if not username or not space_url or not answers_json:
        return "Please fill in all fields and run the agent first."
    
    try:
        answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json
        
        if not isinstance(answers, list) or len(answers) == 0:
            return "Error: Run the benchmark first."
        
        if not space_url.endswith("/tree/main"):
            space_url = space_url.rstrip("/") + "/tree/main"
        
        result = submit_answers(username, space_url, answers)
        print(result)
        score = result.get("score", 0)
        correct = result.get("correct_count", 0)
        total = result.get("total_attempted", 0)
        
        cert_msg = "🏆 **Congratulations!** Score above 30% - Certificate earned!" if score > 0.3 else "❌ Need >30% for certificate."
        
        return f"""
## Submission Results

**Score:** {score:.1%}
**Correct:** {correct}/{total}

{cert_msg}

[View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)
"""
    except Exception as e:
        logger.error(f"Submission error: {e}")
        return f"Error: {str(e)}"


# ============ GRADIO APP ============
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
# 🤖 GAIA Benchmark Agent

**Tools:** 🔍 Web Search | 📚 Wikipedia | 🐍 Python | 📄 Files | 🔢 Calculator | 🌐 Webpages | 👁️ Vision (OpenAI)
""")
    
    env_status = gr.Markdown(get_env_status())
    
    with gr.Tabs():
        with gr.TabItem("🧪 Test Single"):
            test_btn = gr.Button("Fetch & Solve Random Question", variant="primary")
            test_q = gr.Textbox(label="Question", lines=4, interactive=False)
            test_a = gr.Textbox(label="Answer", lines=2, interactive=False)
            test_id = gr.Textbox(label="Task ID", interactive=False)
            test_status = gr.Textbox(label="Status", interactive=False)
            
            test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status])
        
        with gr.TabItem("🚀 Full Benchmark"):
            run_btn = gr.Button("Run on All Questions", variant="primary")
            results_df = gr.Dataframe(label="Results")
            answers_state = gr.State()
            
            run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state])
        
        with gr.TabItem("📤 Submit"):
            gr.Markdown("### Submit to Leaderboard")
            
            with gr.Row():
                username_in = gr.Textbox(label="HF Username", placeholder="your-username")
                space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space")
            
            answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8)
            submit_btn = gr.Button("Submit", variant="primary")
            submit_result = gr.Markdown()
            
            def format_answers(a):
                return json.dumps(a, indent=2) if a else ""
            
            answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in])
            submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result])
    
    gr.Markdown("""
---
**Setup:**
- Local: `ollama serve` + `ollama pull qwen2.5:32b`  
- Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets
""")

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)