File size: 5,216 Bytes
10e9b7d
c62c685
c91abc5
 
c62c685
e050799
3db6293
cb4241f
c62c685
 
d634beb
c62c685
 
 
 
 
 
 
 
 
 
 
d634beb
 
e0d5690
d634beb
 
 
 
c2690f9
d634beb
 
 
c62c685
 
 
 
 
 
 
 
 
d634beb
c62c685
 
d634beb
6aa0515
d634beb
c62c685
d634beb
6aa0515
c62c685
 
d634beb
 
e0e5e7c
d634beb
 
 
 
 
 
 
 
 
 
 
 
 
 
c62c685
d634beb
c62c685
d634beb
e0e5e7c
c62c685
bba9209
c62c685
bba9209
c62c685
 
 
 
6aa0515
c62c685
 
 
6aa0515
c62c685
 
 
 
 
 
 
d7ac30f
c62c685
e0e5e7c
d634beb
 
 
 
 
c62c685
 
 
 
 
 
 
 
 
 
 
 
 
e0e5e7c
d634beb
c62c685
 
d7ac30f
c91abc5
c62c685
1a20082
d634beb
c62c685
 
 
 
d634beb
 
 
 
 
 
 
 
6aa0515
 
1a20082
c62c685
 
 
 
 
 
 
 
 
c91abc5
c62c685
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import gradio as gr
import requests
import pandas as pd
from agent import GaiaAgent

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetch questions from GAIA API, run agent, submit answers
    """
    
    # Check login
    if not profile:
        return "Please login to Hugging Face first.", None
    
    username = profile.username
    space_id = os.getenv("SPACE_ID")
    
    print(f"\n[run_and_submit_all] starting for user: {username}")
    
    # Fetch questions from API
    print("[run_and_submit_all] fetching questions from API...")
    try:
        questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
        questions_resp.raise_for_status()
        questions = questions_resp.json()
        print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions")
    except Exception as e:
        error_msg = f"Error fetching questions: {str(e)[:200]}"
        print(f"[run_and_submit_all] ✗ {error_msg}")
        return error_msg, None
    
    # Initialize agent
    print("[run_and_submit_all] initializing agent...")
    agent = GaiaAgent()
    
    # Run agent on each question
    results_log = []
    answers_payload = []
    
    for i, item in enumerate(questions):
        task_id = item.get("task_id")
        question = item.get("question")
        file_name = item.get("file_name", "")
        
        print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}")
        print(f"  question: {question[:80]}...")
        print(f"  file: {file_name if file_name else '(none)'}")
        
        file_content = ""
        
        # Try to fetch file if it exists
        if file_name:
            try:
                print(f"  fetching file: {file_name}...")
                file_resp = requests.get(
                    f"{DEFAULT_API_URL}/files/{task_id}",
                    timeout=30
                )
                if file_resp.status_code == 200:
                    # Try to decode as text first
                    try:
                        file_content = file_resp.text[:5000]
                        print(f"  loaded {len(file_content)} chars from file")
                    except:
                        # If binary, note it
                        file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]"
                        print(f"  loaded binary file")
                else:
                    print(f"  file fetch returned {file_resp.status_code} (skipping)")
            except Exception as e:
                print(f"  error fetching file: {e}")
        
        # Run agent
        try:
            answer = agent(question, file_content=file_content)
        except Exception as e:
            print(f"  error running agent: {e}")
            answer = "I am unable to answer"
        
        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
        results_log.append({
            "Task ID": task_id[:8] + "...",
            "Question": question[:60] + "...",
            "Answer": answer[:60] + "..." if len(answer) > 60 else answer
        })
    
    # Submit answers
    print(f"\n[run_and_submit_all] submitting {len(answers_payload)} answers...")
    submission_data = {
        "username": username.strip(),
        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local",
        "answers": answers_payload
    }
    
    try:
        response = requests.post(
            f"{DEFAULT_API_URL}/submit",
            json=submission_data,
            timeout=60
        )
        response.raise_for_status()
        result = response.json()
        
        status_msg = (
            f"✅ Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
            f"Message: {result.get('message', 'No message')}"
        )
        print(status_msg)
        return status_msg, pd.DataFrame(results_log)
        
    except Exception as e:
        error_msg = f"❌ Submission failed: {str(e)[:200]}"
        print(error_msg)
        return error_msg, pd.DataFrame(results_log)


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent — Mistral")
    gr.Markdown("""
    **How it works:**
    1. Click "Login with Hugging Face"
    2. Click "Run Evaluation"
    3. Agent processes all 20 questions
    4. See your score instantly!
    
    **Features:**
    - Uses Mistral model via Groq API
    - Web search via DuckDuckGo (free, no keys)
    - Fetches files from GAIA API
    - Automatic answer submission
    """)
    
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit", size="lg", variant="primary")
    
    status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Results", wrap=True)
    
    run_btn.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch(debug=True, share=False)