Spaces:
Sleeping
Sleeping
File size: 10,580 Bytes
93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc e95a92e d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc d1dcd56 93b72dc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | import os
import gradio as gr
import requests
import pandas as pd
import tempfile
import json
import logging
from typing import Optional
from dotenv import load_dotenv
load_dotenv()
from agent_enhanced import GAIAAgent, is_ollama_available, is_production
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def fetch_questions(api_url: str = DEFAULT_API_URL) -> list:
"""Fetch all questions from the GAIA API."""
for attempt in range(3):
try:
response = requests.get(f"{api_url}/questions", timeout=30)
response.raise_for_status()
questions = response.json()
# Print all questions with their task IDs
print("\n" + "="*80)
print("ALL QUESTIONS WITH TASK IDs:")
print("="*80)
for i, q in enumerate(questions, 1):
task_id = q.get("task_id", "N/A")
question_text = q.get("question", "N/A")
file_name = q.get("file_name", "")
print(f"\n[{i}] Task ID: {task_id}")
print(f" Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}")
if file_name:
print(f" File: {file_name}")
print("\n" + "="*80)
print(f"Total questions: {len(questions)}")
print("="*80 + "\n")
return questions
except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed: {e}")
return []
def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict:
"""Fetch a random question."""
for attempt in range(3):
try:
response = requests.get(f"{api_url}/random-question", timeout=30)
response.raise_for_status()
return response.json()
except Exception as e:
logger.warning(f"Attempt {attempt + 1} failed: {e}")
return {}
def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]:
"""Fetch file for a task."""
try:
response = requests.get(f"{api_url}/files/{task_id}", timeout=30)
if response.status_code == 200:
content_disposition = response.headers.get('content-disposition', '')
filename = f"task_{task_id}_file"
if 'filename=' in content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
temp_dir = tempfile.mkdtemp()
file_path = os.path.join(temp_dir, filename)
with open(file_path, 'wb') as f:
f.write(response.content)
logger.info(f"Downloaded: {file_path}")
return file_path
elif response.status_code == 404:
return None
except Exception as e:
logger.error(f"File fetch failed: {e}")
return None
def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict:
"""Submit answers to API."""
payload = {"username": username, "agent_code": agent_code, "answers": answers}
response = requests.post(f"{api_url}/submit", json=payload, timeout=60)
response.raise_for_status()
return response.json()
def get_env_status() -> str:
"""Get environment status."""
if is_production():
return "βοΈ **Production Mode** (HuggingFace Spaces) - Using OpenAI GPT-4o"
elif is_ollama_available():
return "π **Local Mode** - Using Ollama"
elif os.environ.get("OPENAI_API_KEY"):
return "βοΈ **Local + OpenAI** - Using OpenAI GPT-4o"
else:
return "β οΈ **No Backend** - Set OPENAI_API_KEY or start Ollama"
def run_agent_on_questions(progress=gr.Progress()):
"""Run agent on all questions."""
try:
env_info = get_env_status()
progress(0, desc="Initializing agent...")
agent = GAIAAgent()
progress(0.05, desc="Fetching questions...")
questions = fetch_questions()
if not questions:
return "Error: Failed to fetch questions.", None
total = len(questions)
results = []
answers_for_submission = []
for i, q in enumerate(questions):
progress((i + 1) / total, desc=f"Question {i+1}/{total}...")
task_id = q.get("task_id", "")
question_text = q.get("question", "")
file_path = None
if q.get("file_name"):
file_path = fetch_file(task_id)
try:
answer = agent.run(question_text, task_id, file_path)
except Exception as e:
logger.error(f"Error on question {i+1}: {e}")
answer = f"Error: {str(e)}"
results.append({
"Task ID": task_id,
"Question": question_text,
"Answer": answer,
"Status": "β" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "β"
})
answers_for_submission.append({
"task_id": task_id,
"submitted_answer": answer
})
# Cleanup
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
os.rmdir(os.path.dirname(file_path))
except:
pass
df = pd.DataFrame(results)
progress(1.0, desc="Complete!")
return df, answers_for_submission
except Exception as e:
logger.error(f"Error: {e}")
return f"Error: {str(e)}", None
def test_single_question():
"""Test on a single random question."""
try:
agent = GAIAAgent()
question_data = fetch_random_question()
if not question_data:
return "Error: Failed to fetch question.", "", "", ""
task_id = question_data.get("task_id", "")
question_text = question_data.get("question", "")
file_path = None
if question_data.get("file_name"):
file_path = fetch_file(task_id)
answer = agent.run(question_text, task_id, file_path)
# Cleanup
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
os.rmdir(os.path.dirname(file_path))
except:
pass
status = "β Valid" if answer and not answer.startswith("Error") else "β οΈ Check answer"
return question_text, answer, task_id, status
except Exception as e:
logger.error(f"Error: {e}")
return f"Error: {str(e)}", "", "", ""
def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
"""Submit to leaderboard."""
if not username or not space_url or not answers_json:
return "Please fill in all fields and run the agent first."
try:
answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json
if not isinstance(answers, list) or len(answers) == 0:
return "Error: Run the benchmark first."
if not space_url.endswith("/tree/main"):
space_url = space_url.rstrip("/") + "/tree/main"
result = submit_answers(username, space_url, answers)
print(result)
score = result.get("score", 0)
correct = result.get("correct_count", 0)
total = result.get("total_attempted", 0)
cert_msg = "π **Congratulations!** Score above 30% - Certificate earned!" if score > 0.3 else "β Need >30% for certificate."
return f"""
## Submission Results
**Score:** {score:.1%}
**Correct:** {correct}/{total}
{cert_msg}
[View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard)
"""
except Exception as e:
logger.error(f"Submission error: {e}")
return f"Error: {str(e)}"
# ============ GRADIO APP ============
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π€ GAIA Benchmark Agent
**Tools:** π Web Search | π Wikipedia | π Python | π Files | π’ Calculator | π Webpages | ποΈ Vision (OpenAI)
""")
env_status = gr.Markdown(get_env_status())
with gr.Tabs():
with gr.TabItem("π§ͺ Test Single"):
test_btn = gr.Button("Fetch & Solve Random Question", variant="primary")
test_q = gr.Textbox(label="Question", lines=4, interactive=False)
test_a = gr.Textbox(label="Answer", lines=2, interactive=False)
test_id = gr.Textbox(label="Task ID", interactive=False)
test_status = gr.Textbox(label="Status", interactive=False)
test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status])
with gr.TabItem("π Full Benchmark"):
run_btn = gr.Button("Run on All Questions", variant="primary")
results_df = gr.Dataframe(label="Results")
answers_state = gr.State()
run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state])
with gr.TabItem("π€ Submit"):
gr.Markdown("### Submit to Leaderboard")
with gr.Row():
username_in = gr.Textbox(label="HF Username", placeholder="your-username")
space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space")
answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8)
submit_btn = gr.Button("Submit", variant="primary")
submit_result = gr.Markdown()
def format_answers(a):
return json.dumps(a, indent=2) if a else ""
answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in])
submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result])
gr.Markdown("""
---
**Setup:**
- Local: `ollama serve` + `ollama pull qwen2.5:32b`
- Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|