import os import gradio as gr import requests import pandas as pd import tempfile import json import logging from typing import Optional from dotenv import load_dotenv load_dotenv() from agent_enhanced import GAIAAgent, is_ollama_available, is_production DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def fetch_questions(api_url: str = DEFAULT_API_URL) -> list: """Fetch all questions from the GAIA API.""" for attempt in range(3): try: response = requests.get(f"{api_url}/questions", timeout=30) response.raise_for_status() questions = response.json() # Print all questions with their task IDs print("\n" + "="*80) print("ALL QUESTIONS WITH TASK IDs:") print("="*80) for i, q in enumerate(questions, 1): task_id = q.get("task_id", "N/A") question_text = q.get("question", "N/A") file_name = q.get("file_name", "") print(f"\n[{i}] Task ID: {task_id}") print(f" Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}") if file_name: print(f" File: {file_name}") print("\n" + "="*80) print(f"Total questions: {len(questions)}") print("="*80 + "\n") return questions except Exception as e: logger.warning(f"Attempt {attempt + 1} failed: {e}") return [] def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict: """Fetch a random question.""" for attempt in range(3): try: response = requests.get(f"{api_url}/random-question", timeout=30) response.raise_for_status() return response.json() except Exception as e: logger.warning(f"Attempt {attempt + 1} failed: {e}") return {} def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]: """Fetch file for a task.""" try: response = requests.get(f"{api_url}/files/{task_id}", timeout=30) if response.status_code == 200: content_disposition = response.headers.get('content-disposition', '') filename = f"task_{task_id}_file" if 'filename=' in content_disposition: filename = content_disposition.split('filename=')[1].strip('"') temp_dir = tempfile.mkdtemp() file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as f: f.write(response.content) logger.info(f"Downloaded: {file_path}") return file_path elif response.status_code == 404: return None except Exception as e: logger.error(f"File fetch failed: {e}") return None def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict: """Submit answers to API.""" payload = {"username": username, "agent_code": agent_code, "answers": answers} response = requests.post(f"{api_url}/submit", json=payload, timeout=60) response.raise_for_status() return response.json() def get_env_status() -> str: """Get environment status.""" if is_production(): return "โ˜๏ธ **Production Mode** (HuggingFace Spaces) - Using OpenAI GPT-4o" elif is_ollama_available(): return "๐Ÿ  **Local Mode** - Using Ollama" elif os.environ.get("OPENAI_API_KEY"): return "โ˜๏ธ **Local + OpenAI** - Using OpenAI GPT-4o" else: return "โš ๏ธ **No Backend** - Set OPENAI_API_KEY or start Ollama" def run_agent_on_questions(progress=gr.Progress()): """Run agent on all questions.""" try: env_info = get_env_status() progress(0, desc="Initializing agent...") agent = GAIAAgent() progress(0.05, desc="Fetching questions...") questions = fetch_questions() if not questions: return "Error: Failed to fetch questions.", None total = len(questions) results = [] answers_for_submission = [] for i, q in enumerate(questions): progress((i + 1) / total, desc=f"Question {i+1}/{total}...") task_id = q.get("task_id", "") question_text = q.get("question", "") file_path = None if q.get("file_name"): file_path = fetch_file(task_id) try: answer = agent.run(question_text, task_id, file_path) except Exception as e: logger.error(f"Error on question {i+1}: {e}") answer = f"Error: {str(e)}" results.append({ "Task ID": task_id, "Question": question_text, "Answer": answer, "Status": "โœ“" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "โœ—" }) answers_for_submission.append({ "task_id": task_id, "submitted_answer": answer }) # Cleanup if file_path and os.path.exists(file_path): try: os.remove(file_path) os.rmdir(os.path.dirname(file_path)) except: pass df = pd.DataFrame(results) progress(1.0, desc="Complete!") return df, answers_for_submission except Exception as e: logger.error(f"Error: {e}") return f"Error: {str(e)}", None def test_single_question(): """Test on a single random question.""" try: agent = GAIAAgent() question_data = fetch_random_question() if not question_data: return "Error: Failed to fetch question.", "", "", "" task_id = question_data.get("task_id", "") question_text = question_data.get("question", "") file_path = None if question_data.get("file_name"): file_path = fetch_file(task_id) answer = agent.run(question_text, task_id, file_path) # Cleanup if file_path and os.path.exists(file_path): try: os.remove(file_path) os.rmdir(os.path.dirname(file_path)) except: pass status = "โœ“ Valid" if answer and not answer.startswith("Error") else "โš ๏ธ Check answer" return question_text, answer, task_id, status except Exception as e: logger.error(f"Error: {e}") return f"Error: {str(e)}", "", "", "" def submit_to_leaderboard(username: str, space_url: str, answers_json: str): """Submit to leaderboard.""" if not username or not space_url or not answers_json: return "Please fill in all fields and run the agent first." try: answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json if not isinstance(answers, list) or len(answers) == 0: return "Error: Run the benchmark first." if not space_url.endswith("/tree/main"): space_url = space_url.rstrip("/") + "/tree/main" result = submit_answers(username, space_url, answers) print(result) score = result.get("score", 0) correct = result.get("correct_count", 0) total = result.get("total_attempted", 0) cert_msg = "๐Ÿ† **Congratulations!** Score above 30% - Certificate earned!" if score > 0.3 else "โŒ Need >30% for certificate." return f""" ## Submission Results **Score:** {score:.1%} **Correct:** {correct}/{total} {cert_msg} [View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) """ except Exception as e: logger.error(f"Submission error: {e}") return f"Error: {str(e)}" # ============ GRADIO APP ============ with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # ๐Ÿค– GAIA Benchmark Agent **Tools:** ๐Ÿ” Web Search | ๐Ÿ“š Wikipedia | ๐Ÿ Python | ๐Ÿ“„ Files | ๐Ÿ”ข Calculator | ๐ŸŒ Webpages | ๐Ÿ‘๏ธ Vision (OpenAI) """) env_status = gr.Markdown(get_env_status()) with gr.Tabs(): with gr.TabItem("๐Ÿงช Test Single"): test_btn = gr.Button("Fetch & Solve Random Question", variant="primary") test_q = gr.Textbox(label="Question", lines=4, interactive=False) test_a = gr.Textbox(label="Answer", lines=2, interactive=False) test_id = gr.Textbox(label="Task ID", interactive=False) test_status = gr.Textbox(label="Status", interactive=False) test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status]) with gr.TabItem("๐Ÿš€ Full Benchmark"): run_btn = gr.Button("Run on All Questions", variant="primary") results_df = gr.Dataframe(label="Results") answers_state = gr.State() run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state]) with gr.TabItem("๐Ÿ“ค Submit"): gr.Markdown("### Submit to Leaderboard") with gr.Row(): username_in = gr.Textbox(label="HF Username", placeholder="your-username") space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space") answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8) submit_btn = gr.Button("Submit", variant="primary") submit_result = gr.Markdown() def format_answers(a): return json.dumps(a, indent=2) if a else "" answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in]) submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result]) gr.Markdown(""" --- **Setup:** - Local: `ollama serve` + `ollama pull qwen2.5:32b` - Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)