Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import asyncio | |
| from typing import Optional | |
| from langchain_core.messages import HumanMessage | |
| from langgraph_new import graph # Your graph agent | |
| # Constants | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| user_answers_cache = {} # session-based cache | |
| class GaiaAgent: | |
| def __init__(self): | |
| print("Graph-based agent initialized.") | |
| def __call__(self, question: str) -> str: | |
| print("Received question:", question) | |
| try: | |
| # FIXED: Correct input for LangGraph | |
| result = graph.invoke({"messages": [HumanMessage(content=question)]}) | |
| messages = result.get("messages", []) | |
| if messages: | |
| return messages[-1].content.strip() | |
| return "No messages returned." | |
| except Exception as e: | |
| return f"ERROR invoking graph: {e}" | |
| # Async runner | |
| async def run_agent(profile: gr.OAuthProfile | None): | |
| if not profile: | |
| return "Please login to Hugging Face.", None | |
| username = profile.username | |
| agent = GaiaAgent() | |
| # 1. Load questions | |
| try: | |
| response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| except Exception as e: | |
| return f"Error fetching questions: {e}", None | |
| # 2. Process questions | |
| async def process(item): | |
| task_id = item.get("task_id") | |
| question = item.get("question") | |
| try: | |
| answer = await asyncio.to_thread(agent, question) | |
| return {"task_id": task_id, "question": question, "submitted_answer": answer} | |
| except Exception as e: | |
| return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"} | |
| results = await asyncio.gather(*(process(item) for item in questions_data)) | |
| user_answers_cache[username] = results | |
| df = pd.DataFrame(results) | |
| return f"Answered {len(results)} questions. Ready to submit.", df | |
| # Submission | |
| def submit_answers(profile: gr.OAuthProfile | None): | |
| if not profile: | |
| return "Please login to Hugging Face.", None | |
| username = profile.username.strip() | |
| if username not in user_answers_cache: | |
| return "No cached answers. Please run the agent first.", None | |
| answers_payload = [ | |
| {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]} | |
| for item in user_answers_cache[username] | |
| ] | |
| space_id = os.getenv("SPACE_ID", "") | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "" | |
| submission_data = { | |
| "username": username, | |
| "agent_code": agent_code, | |
| "answers": answers_payload, | |
| } | |
| try: | |
| response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60) | |
| response.raise_for_status() | |
| result = response.json() | |
| final_status = ( | |
| f"β Submission Successful!\n" | |
| f"π€ User: {result.get('username')}\n" | |
| f"π― Score: {result.get('score', 'N/A')}% " | |
| f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" | |
| f"π© Message: {result.get('message', 'No message received.')}" | |
| ) | |
| df = pd.DataFrame(user_answers_cache[username]) | |
| return final_status, df | |
| except Exception as e: | |
| return f"β Submission failed: {e}", pd.DataFrame(user_answers_cache[username]) | |
| # ββββββββββ Gradio UI ββββββββββ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π§ GAIA Agent Evaluation") | |
| gr.LoginButton() | |
| run_button = gr.Button("βΆοΈ Run Agent on GAIA Questions") | |
| submit_button = gr.Button("π€ Submit Cached Answers") | |
| status = gr.Textbox(label="Status", lines=6, interactive=False) | |
| results = gr.DataFrame(label="Answers", wrap=True) | |
| run_button.click(run_agent, outputs=[status, results]) | |
| submit_button.click(submit_answers, outputs=[status, results]) | |
| if __name__ == "__main__": | |
| print("Launching Gradio app...") | |
| demo.launch(debug=True, share=False) | |