import os, time, traceback import gradio as gr import requests import pandas as pd from agent import GAIAAgent API_BASE = "https://agents-course-unit4-scoring.hf.space" SPACE_ID = os.environ.get("SPACE_ID", "") def fetch_questions(): r = requests.get(f"{API_BASE}/questions", timeout=30) r.raise_for_status() return r.json() def run_and_submit_all(hf_token: str, oauth_token: gr.OAuthToken | None = None, profile: gr.OAuthProfile | None = None): if profile is None: return "❌ Please log in with HuggingFace first.", None username = profile.username space_id = SPACE_ID or f"{username}/Final_Assignment_Template" agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" # Debug: show what token we're actually using env_token = os.environ.get("HF_TOKEN", "") ui_token = hf_token.strip() if hf_token else "" effective_token = ui_token or env_token print(f"DEBUG: env HF_TOKEN={'SET('+env_token[:8]+')' if env_token else 'NOT SET'}") print(f"DEBUG: UI token={'SET('+ui_token[:8]+')' if ui_token else 'NOT SET'}") print(f"DEBUG: Using={'UI' if ui_token else 'ENV'}") try: questions = fetch_questions() except Exception as e: return f"❌ Failed to fetch questions: {e}", None agent = GAIAAgent(hf_token=effective_token or None) answers, rows = [], [] for i, item in enumerate(questions): task_id = item.get("task_id", "") question = item.get("question", "") file_name = item.get("file_name", "") q = question if file_name: q = f"This question has an attached file: {file_name}\nTask ID: {task_id}\n\n{question}" print(f"[{i+1}/20] {task_id[:8]}...") start = time.time() try: from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout with ThreadPoolExecutor(max_workers=1) as ex: future = ex.submit(agent, q, task_id) answer = future.result(timeout=90) except Exception as e: answer = "I don't know" print(f"Error/timeout on {task_id}: {type(e).__name__}") elapsed = time.time() - start answer = answer.lstrip("$€£").strip() answers.append({"task_id": task_id, "submitted_answer": answer}) rows.append({"#": i+1, "Task ID": task_id[:8], "Question": question[:80], "Answer": answer, "Time(s)": f"{elapsed:.1f}"}) print(f" → {answer} ({elapsed:.1f}s)") time.sleep(0.3) try: resp = requests.post(f"{API_BASE}/submit", json={ "username": username, "agent_code": agent_code_url, "answers": answers, }, timeout=60) resp.raise_for_status() result = resp.json() except Exception as e: return f"❌ Submission failed: {e}", pd.DataFrame(rows) score = result.get("score", "?") correct = result.get("correct_count", "?") total = result.get("total_questions", 20) summary = (f"**Username:** {username}\n**Score:** {score}% | **Correct:** {correct}/{total}\n" f"**Message:** {result.get('message','')}\n**Space:** [{space_id}]({agent_code_url})") return summary, pd.DataFrame(rows) def test_single(question: str, task_id: str, hf_token: str) -> str: if not question.strip(): return "Enter a question." env_token = os.environ.get("HF_TOKEN", "") effective_token = hf_token.strip() or env_token agent = GAIAAgent(hf_token=effective_token or None) return agent(question, task_id=task_id or None) with gr.Blocks(title="GAIA Agent") as demo: gr.Markdown("# 🤖 GAIA Benchmark Agent\n**HuggingFace Agents Course — Unit 4**") hf_login = gr.LoginButton() hf_token_input = gr.Textbox(label="HF Token (paste here directly)", type="password", placeholder="hf_...") with gr.Tabs(): with gr.Tab("🚀 Run & Submit All"): run_btn = gr.Button("▶ Run & Submit All", variant="primary") status = gr.Markdown() table = gr.Dataframe(wrap=True) run_btn.click(fn=run_and_submit_all, inputs=[hf_token_input], outputs=[status, table]) with gr.Tab("🔬 Test Single Question"): q_input = gr.Textbox(label="Question", lines=3) tid_input = gr.Textbox(label="Task ID (optional)") test_btn = gr.Button("Ask", variant="primary") test_out = gr.Markdown() test_btn.click(fn=test_single, inputs=[q_input, tid_input, hf_token_input], outputs=test_out) if __name__ == "__main__": demo.launch(debug=True, theme=gr.themes.Soft())