Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,14 @@
|
|
| 1 |
import os
|
| 2 |
-
import io
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|
| 5 |
import pandas as pd
|
| 6 |
-
from datasets import load_dataset
|
| 7 |
-
from huggingface_hub import snapshot_download
|
| 8 |
from agent import GaiaAgent
|
| 9 |
|
| 10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 11 |
|
| 12 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 13 |
"""
|
| 14 |
-
|
| 15 |
"""
|
| 16 |
|
| 17 |
# Check login
|
|
@@ -22,67 +19,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 22 |
space_id = os.getenv("SPACE_ID")
|
| 23 |
|
| 24 |
print(f"\n[run_and_submit_all] starting for user: {username}")
|
| 25 |
-
print(f"[run_and_submit_all] space_id: {space_id}")
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
print("[run_and_submit_all]
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
)
|
| 34 |
-
print(f"[run_and_submit_all] dataset downloaded to: {data_dir}")
|
| 35 |
except Exception as e:
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
# Load dataset
|
| 40 |
-
print("[run_and_submit_all] loading dataset...")
|
| 41 |
-
try:
|
| 42 |
-
dataset = load_dataset(data_dir, "2023_level1", split="validation", cache_dir=data_dir)
|
| 43 |
-
print(f"[run_and_submit_all] loaded {len(dataset)} questions")
|
| 44 |
-
except Exception as e:
|
| 45 |
-
print(f"[run_and_submit_all] error loading dataset: {e}")
|
| 46 |
-
return f"Error loading dataset: {e}", None
|
| 47 |
-
|
| 48 |
-
# Map task IDs to file paths
|
| 49 |
-
id_to_path = {}
|
| 50 |
-
for ex in dataset:
|
| 51 |
-
if ex.get("file_path") and ex.get("file_name"):
|
| 52 |
-
full_path = os.path.join(data_dir, ex["file_path"])
|
| 53 |
-
if os.path.exists(full_path):
|
| 54 |
-
id_to_path[ex["task_id"]] = full_path
|
| 55 |
-
|
| 56 |
-
print(f"[run_and_submit_all] mapped {len(id_to_path)} task IDs to files")
|
| 57 |
-
|
| 58 |
-
# Target task IDs (the 20 questions)
|
| 59 |
-
target_task_ids = [
|
| 60 |
-
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 61 |
-
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
| 62 |
-
"2d83110e-a098-4ebb-9987-066c06fa42d0",
|
| 63 |
-
"cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 64 |
-
"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 65 |
-
"6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 66 |
-
"9d191bce-651d-4746-be2d-7ef8ecadb9c2",
|
| 67 |
-
"cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
| 68 |
-
"3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
|
| 69 |
-
"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 70 |
-
"305ac316-eef6-4446-960a-92d80d542f82",
|
| 71 |
-
"f918266a-b3e0-4914-865d-4faa564f1aef",
|
| 72 |
-
"3f57289b-8c60-48be-bd80-01f8099ca449",
|
| 73 |
-
"1f975693-876d-457b-a649-393859e79bf3",
|
| 74 |
-
"840bfca7-4f7b-481a-8794-c560c340185d",
|
| 75 |
-
"bda648d7-d618-4883-88f4-3466eabd860e",
|
| 76 |
-
"cf106601-ab4f-4af9-b045-5295fe67b37d",
|
| 77 |
-
"a0c07678-e491-4bbc-8f0b-07405144218f",
|
| 78 |
-
"7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 79 |
-
"5a0c1adf-205e-4841-a666-7c3ef95def9d"
|
| 80 |
-
]
|
| 81 |
-
|
| 82 |
-
# Filter to target questions
|
| 83 |
-
print(f"[run_and_submit_all] filtering to {len(target_task_ids)} target questions...")
|
| 84 |
-
subset = dataset.filter(lambda ex: ex["task_id"] in target_task_ids)
|
| 85 |
-
subset = subset.to_list()
|
| 86 |
|
| 87 |
# Initialize agent
|
| 88 |
print("[run_and_submit_all] initializing agent...")
|
|
@@ -92,35 +40,38 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 92 |
results_log = []
|
| 93 |
answers_payload = []
|
| 94 |
|
| 95 |
-
for i, item in enumerate(
|
| 96 |
task_id = item.get("task_id")
|
| 97 |
question = item.get("question")
|
| 98 |
-
file_name = item.get("file_name")
|
| 99 |
-
file_path = id_to_path.get(task_id)
|
| 100 |
|
| 101 |
-
print(f"\n[run_and_submit_all] [{i+1}/{len(
|
| 102 |
print(f" question: {question[:80]}...")
|
| 103 |
-
print(f" file: {file_name}")
|
| 104 |
|
| 105 |
file_content = ""
|
| 106 |
|
| 107 |
-
#
|
| 108 |
-
if
|
| 109 |
try:
|
| 110 |
-
print(f"
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
else:
|
| 116 |
-
|
| 117 |
-
with open(file_path, "rb") as f:
|
| 118 |
-
file_bytes = f.read()
|
| 119 |
-
# Just note that we have a binary file
|
| 120 |
-
file_content = f"[Binary file: {file_name}, {len(file_bytes)} bytes]"
|
| 121 |
-
print(f" loaded binary file ({len(file_bytes)} bytes)")
|
| 122 |
except Exception as e:
|
| 123 |
-
print(f" error
|
| 124 |
|
| 125 |
# Run agent
|
| 126 |
try:
|
|
@@ -145,7 +96,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 145 |
}
|
| 146 |
|
| 147 |
try:
|
| 148 |
-
response = requests.post(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
response.raise_for_status()
|
| 150 |
result = response.json()
|
| 151 |
|
|
@@ -160,27 +115,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 160 |
return status_msg, pd.DataFrame(results_log)
|
| 161 |
|
| 162 |
except Exception as e:
|
| 163 |
-
error_msg = f"❌ Submission failed: {e}"
|
| 164 |
print(error_msg)
|
| 165 |
return error_msg, pd.DataFrame(results_log)
|
| 166 |
|
| 167 |
|
| 168 |
# Gradio UI
|
| 169 |
with gr.Blocks() as demo:
|
| 170 |
-
gr.Markdown("# GAIA Agent — Mistral
|
| 171 |
gr.Markdown("""
|
| 172 |
**How it works:**
|
| 173 |
1. Click "Login with Hugging Face"
|
| 174 |
2. Click "Run Evaluation"
|
| 175 |
-
3.
|
| 176 |
-
4. See your score!
|
| 177 |
-
|
| 178 |
-
**
|
| 179 |
-
-
|
| 180 |
-
-
|
| 181 |
-
-
|
| 182 |
-
-
|
| 183 |
-
- Submits all answers for scoring
|
| 184 |
""")
|
| 185 |
|
| 186 |
gr.LoginButton()
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
|
|
|
|
|
|
| 5 |
from agent import GaiaAgent
|
| 6 |
|
| 7 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 8 |
|
| 9 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 10 |
"""
|
| 11 |
+
Fetch questions from GAIA API, run agent, submit answers
|
| 12 |
"""
|
| 13 |
|
| 14 |
# Check login
|
|
|
|
| 19 |
space_id = os.getenv("SPACE_ID")
|
| 20 |
|
| 21 |
print(f"\n[run_and_submit_all] starting for user: {username}")
|
|
|
|
| 22 |
|
| 23 |
+
# Fetch questions from API
|
| 24 |
+
print("[run_and_submit_all] fetching questions from API...")
|
| 25 |
try:
|
| 26 |
+
questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
|
| 27 |
+
questions_resp.raise_for_status()
|
| 28 |
+
questions = questions_resp.json()
|
| 29 |
+
print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions")
|
|
|
|
| 30 |
except Exception as e:
|
| 31 |
+
error_msg = f"Error fetching questions: {str(e)[:200]}"
|
| 32 |
+
print(f"[run_and_submit_all] ✗ {error_msg}")
|
| 33 |
+
return error_msg, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Initialize agent
|
| 36 |
print("[run_and_submit_all] initializing agent...")
|
|
|
|
| 40 |
results_log = []
|
| 41 |
answers_payload = []
|
| 42 |
|
| 43 |
+
for i, item in enumerate(questions):
|
| 44 |
task_id = item.get("task_id")
|
| 45 |
question = item.get("question")
|
| 46 |
+
file_name = item.get("file_name", "")
|
|
|
|
| 47 |
|
| 48 |
+
print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}")
|
| 49 |
print(f" question: {question[:80]}...")
|
| 50 |
+
print(f" file: {file_name if file_name else '(none)'}")
|
| 51 |
|
| 52 |
file_content = ""
|
| 53 |
|
| 54 |
+
# Try to fetch file if it exists
|
| 55 |
+
if file_name:
|
| 56 |
try:
|
| 57 |
+
print(f" fetching file: {file_name}...")
|
| 58 |
+
file_resp = requests.get(
|
| 59 |
+
f"{DEFAULT_API_URL}/files/{task_id}",
|
| 60 |
+
timeout=30
|
| 61 |
+
)
|
| 62 |
+
if file_resp.status_code == 200:
|
| 63 |
+
# Try to decode as text first
|
| 64 |
+
try:
|
| 65 |
+
file_content = file_resp.text[:5000]
|
| 66 |
+
print(f" loaded {len(file_content)} chars from file")
|
| 67 |
+
except:
|
| 68 |
+
# If binary, note it
|
| 69 |
+
file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]"
|
| 70 |
+
print(f" loaded binary file")
|
| 71 |
else:
|
| 72 |
+
print(f" file fetch returned {file_resp.status_code} (skipping)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
except Exception as e:
|
| 74 |
+
print(f" error fetching file: {e}")
|
| 75 |
|
| 76 |
# Run agent
|
| 77 |
try:
|
|
|
|
| 96 |
}
|
| 97 |
|
| 98 |
try:
|
| 99 |
+
response = requests.post(
|
| 100 |
+
f"{DEFAULT_API_URL}/submit",
|
| 101 |
+
json=submission_data,
|
| 102 |
+
timeout=60
|
| 103 |
+
)
|
| 104 |
response.raise_for_status()
|
| 105 |
result = response.json()
|
| 106 |
|
|
|
|
| 115 |
return status_msg, pd.DataFrame(results_log)
|
| 116 |
|
| 117 |
except Exception as e:
|
| 118 |
+
error_msg = f"❌ Submission failed: {str(e)[:200]}"
|
| 119 |
print(error_msg)
|
| 120 |
return error_msg, pd.DataFrame(results_log)
|
| 121 |
|
| 122 |
|
| 123 |
# Gradio UI
|
| 124 |
with gr.Blocks() as demo:
|
| 125 |
+
gr.Markdown("# GAIA Agent — Mistral")
|
| 126 |
gr.Markdown("""
|
| 127 |
**How it works:**
|
| 128 |
1. Click "Login with Hugging Face"
|
| 129 |
2. Click "Run Evaluation"
|
| 130 |
+
3. Agent processes all 20 questions
|
| 131 |
+
4. See your score instantly!
|
| 132 |
+
|
| 133 |
+
**Features:**
|
| 134 |
+
- Uses Mistral model via Groq API
|
| 135 |
+
- Web search via DuckDuckGo (free, no keys)
|
| 136 |
+
- Fetches files from GAIA API
|
| 137 |
+
- Automatic answer submission
|
|
|
|
| 138 |
""")
|
| 139 |
|
| 140 |
gr.LoginButton()
|