Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import tempfile | |
| import json | |
| import logging | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from agent_enhanced import GAIAAgent, is_ollama_available, is_production | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def fetch_questions(api_url: str = DEFAULT_API_URL) -> list: | |
| """Fetch all questions from the GAIA API.""" | |
| for attempt in range(3): | |
| try: | |
| response = requests.get(f"{api_url}/questions", timeout=30) | |
| response.raise_for_status() | |
| questions = response.json() | |
| # Print all questions with their task IDs | |
| print("\n" + "="*80) | |
| print("ALL QUESTIONS WITH TASK IDs:") | |
| print("="*80) | |
| for i, q in enumerate(questions, 1): | |
| task_id = q.get("task_id", "N/A") | |
| question_text = q.get("question", "N/A") | |
| file_name = q.get("file_name", "") | |
| print(f"\n[{i}] Task ID: {task_id}") | |
| print(f" Question: {question_text[:200]}{'...' if len(question_text) > 200 else ''}") | |
| if file_name: | |
| print(f" File: {file_name}") | |
| print("\n" + "="*80) | |
| print(f"Total questions: {len(questions)}") | |
| print("="*80 + "\n") | |
| return questions | |
| except Exception as e: | |
| logger.warning(f"Attempt {attempt + 1} failed: {e}") | |
| return [] | |
| def fetch_random_question(api_url: str = DEFAULT_API_URL) -> dict: | |
| """Fetch a random question.""" | |
| for attempt in range(3): | |
| try: | |
| response = requests.get(f"{api_url}/random-question", timeout=30) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| logger.warning(f"Attempt {attempt + 1} failed: {e}") | |
| return {} | |
| def fetch_file(task_id: str, api_url: str = DEFAULT_API_URL) -> Optional[str]: | |
| """Fetch file for a task.""" | |
| try: | |
| response = requests.get(f"{api_url}/files/{task_id}", timeout=30) | |
| if response.status_code == 200: | |
| content_disposition = response.headers.get('content-disposition', '') | |
| filename = f"task_{task_id}_file" | |
| if 'filename=' in content_disposition: | |
| filename = content_disposition.split('filename=')[1].strip('"') | |
| temp_dir = tempfile.mkdtemp() | |
| file_path = os.path.join(temp_dir, filename) | |
| with open(file_path, 'wb') as f: | |
| f.write(response.content) | |
| logger.info(f"Downloaded: {file_path}") | |
| return file_path | |
| elif response.status_code == 404: | |
| return None | |
| except Exception as e: | |
| logger.error(f"File fetch failed: {e}") | |
| return None | |
| def submit_answers(username: str, agent_code: str, answers: list, api_url: str = DEFAULT_API_URL) -> dict: | |
| """Submit answers to API.""" | |
| payload = {"username": username, "agent_code": agent_code, "answers": answers} | |
| response = requests.post(f"{api_url}/submit", json=payload, timeout=60) | |
| response.raise_for_status() | |
| return response.json() | |
| def get_env_status() -> str: | |
| """Get environment status.""" | |
| if is_production(): | |
| return "βοΈ **Production Mode** (HuggingFace Spaces) - Using OpenAI GPT-4o" | |
| elif is_ollama_available(): | |
| return "π **Local Mode** - Using Ollama" | |
| elif os.environ.get("OPENAI_API_KEY"): | |
| return "βοΈ **Local + OpenAI** - Using OpenAI GPT-4o" | |
| else: | |
| return "β οΈ **No Backend** - Set OPENAI_API_KEY or start Ollama" | |
| def run_agent_on_questions(progress=gr.Progress()): | |
| """Run agent on all questions.""" | |
| try: | |
| env_info = get_env_status() | |
| progress(0, desc="Initializing agent...") | |
| agent = GAIAAgent() | |
| progress(0.05, desc="Fetching questions...") | |
| questions = fetch_questions() | |
| if not questions: | |
| return "Error: Failed to fetch questions.", None | |
| total = len(questions) | |
| results = [] | |
| answers_for_submission = [] | |
| for i, q in enumerate(questions): | |
| progress((i + 1) / total, desc=f"Question {i+1}/{total}...") | |
| task_id = q.get("task_id", "") | |
| question_text = q.get("question", "") | |
| file_path = None | |
| if q.get("file_name"): | |
| file_path = fetch_file(task_id) | |
| try: | |
| answer = agent.run(question_text, task_id, file_path) | |
| except Exception as e: | |
| logger.error(f"Error on question {i+1}: {e}") | |
| answer = f"Error: {str(e)}" | |
| results.append({ | |
| "Task ID": task_id, | |
| "Question": question_text, | |
| "Answer": answer, | |
| "Status": "β" if answer and not answer.startswith("Error:") and answer != "Unable to determine answer" else "β" | |
| }) | |
| answers_for_submission.append({ | |
| "task_id": task_id, | |
| "submitted_answer": answer | |
| }) | |
| # Cleanup | |
| if file_path and os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| os.rmdir(os.path.dirname(file_path)) | |
| except: | |
| pass | |
| df = pd.DataFrame(results) | |
| progress(1.0, desc="Complete!") | |
| return df, answers_for_submission | |
| except Exception as e: | |
| logger.error(f"Error: {e}") | |
| return f"Error: {str(e)}", None | |
| def test_single_question(): | |
| """Test on a single random question.""" | |
| try: | |
| agent = GAIAAgent() | |
| question_data = fetch_random_question() | |
| if not question_data: | |
| return "Error: Failed to fetch question.", "", "", "" | |
| task_id = question_data.get("task_id", "") | |
| question_text = question_data.get("question", "") | |
| file_path = None | |
| if question_data.get("file_name"): | |
| file_path = fetch_file(task_id) | |
| answer = agent.run(question_text, task_id, file_path) | |
| # Cleanup | |
| if file_path and os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| os.rmdir(os.path.dirname(file_path)) | |
| except: | |
| pass | |
| status = "β Valid" if answer and not answer.startswith("Error") else "β οΈ Check answer" | |
| return question_text, answer, task_id, status | |
| except Exception as e: | |
| logger.error(f"Error: {e}") | |
| return f"Error: {str(e)}", "", "", "" | |
| def submit_to_leaderboard(username: str, space_url: str, answers_json: str): | |
| """Submit to leaderboard.""" | |
| if not username or not space_url or not answers_json: | |
| return "Please fill in all fields and run the agent first." | |
| try: | |
| answers = json.loads(answers_json) if isinstance(answers_json, str) else answers_json | |
| if not isinstance(answers, list) or len(answers) == 0: | |
| return "Error: Run the benchmark first." | |
| if not space_url.endswith("/tree/main"): | |
| space_url = space_url.rstrip("/") + "/tree/main" | |
| result = submit_answers(username, space_url, answers) | |
| print(result) | |
| score = result.get("score", 0) | |
| correct = result.get("correct_count", 0) | |
| total = result.get("total_attempted", 0) | |
| cert_msg = "π **Congratulations!** Score above 30% - Certificate earned!" if score > 0.3 else "β Need >30% for certificate." | |
| return f""" | |
| ## Submission Results | |
| **Score:** {score:.1%} | |
| **Correct:** {correct}/{total} | |
| {cert_msg} | |
| [View Leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) | |
| """ | |
| except Exception as e: | |
| logger.error(f"Submission error: {e}") | |
| return f"Error: {str(e)}" | |
| # ============ GRADIO APP ============ | |
| with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π€ GAIA Benchmark Agent | |
| **Tools:** π Web Search | π Wikipedia | π Python | π Files | π’ Calculator | π Webpages | ποΈ Vision (OpenAI) | |
| """) | |
| env_status = gr.Markdown(get_env_status()) | |
| with gr.Tabs(): | |
| with gr.TabItem("π§ͺ Test Single"): | |
| test_btn = gr.Button("Fetch & Solve Random Question", variant="primary") | |
| test_q = gr.Textbox(label="Question", lines=4, interactive=False) | |
| test_a = gr.Textbox(label="Answer", lines=2, interactive=False) | |
| test_id = gr.Textbox(label="Task ID", interactive=False) | |
| test_status = gr.Textbox(label="Status", interactive=False) | |
| test_btn.click(test_single_question, outputs=[test_q, test_a, test_id, test_status]) | |
| with gr.TabItem("π Full Benchmark"): | |
| run_btn = gr.Button("Run on All Questions", variant="primary") | |
| results_df = gr.Dataframe(label="Results") | |
| answers_state = gr.State() | |
| run_btn.click(run_agent_on_questions, outputs=[results_df, answers_state]) | |
| with gr.TabItem("π€ Submit"): | |
| gr.Markdown("### Submit to Leaderboard") | |
| with gr.Row(): | |
| username_in = gr.Textbox(label="HF Username", placeholder="your-username") | |
| space_url_in = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/you/space") | |
| answers_in = gr.Textbox(label="Answers JSON (auto-filled)", lines=8) | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| submit_result = gr.Markdown() | |
| def format_answers(a): | |
| return json.dumps(a, indent=2) if a else "" | |
| answers_state.change(format_answers, inputs=[answers_state], outputs=[answers_in]) | |
| submit_btn.click(submit_to_leaderboard, inputs=[username_in, space_url_in, answers_in], outputs=[submit_result]) | |
| gr.Markdown(""" | |
| --- | |
| **Setup:** | |
| - Local: `ollama serve` + `ollama pull qwen2.5:32b` | |
| - Production: Set `OPENAI_API_KEY` in `.env` or HF Secrets | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |