Spaces:
Sleeping
Sleeping
| """Full app.py with improved Excel-handling guidelines for GAIA agent. | |
| Copy/paste into your Hugging Face Space. | |
| """ | |
| import os | |
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| import operator | |
| from typing import Sequence, Annotated, TypedDict | |
| from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage | |
| from langchain_experimental.tools import PythonREPLTool | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from langchain_community.tools.youtube.search import YouTubeSearchTool | |
| from langchain_openai import ChatOpenAI | |
| from langgraph.graph import StateGraph | |
| from langgraph.prebuilt import ToolNode, tools_condition | |
| # ----------------------- Agent Definition ---------------------------------- | |
| class AgentState(TypedDict): | |
| messages: Annotated[Sequence[BaseMessage], operator.add] | |
| SYSTEM_PROMPT = ( | |
| "You are a GAIA evaluation agent. For each question, think step‑by‑step, but only output the final answer with the template:\n" | |
| "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n" | |
| "Formatting rules: Your FINAL ANSWER must be a single number, a single short string, or a comma‑separated list, as the task dictates. No extra words.\n\n" | |
| "**IMPORTANT TOOL USAGE**:\n" | |
| "• You have a PythonREPL tool with pandas pre‑installed. If the task references an Excel / CSV file path (e.g. .xlsx, .xls, .csv), do the following:\n" | |
| " 1. Call PythonREPL and load the file with `pd.read_excel(<path>)` or `pd.read_csv(<path>)`.\n" | |
| " 2. Use pandas operations (sum, mean, filtering etc.) to compute the required value.\n" | |
| " 3. Return the numeric/string result in the FINAL ANSWER template.\n\n" | |
| "• Use TavilySearchResults for web look‑ups, YouTubeSearchTool for video queries.\n" | |
| "• If the task involves code execution or math, use PythonREPL.\n" | |
| ) | |
| def create_langgraph_agent(): | |
| llm = ChatOpenAI(model="gpt-4o", temperature=0) | |
| tools = [ | |
| TavilySearchResults(max_results=3), | |
| PythonREPLTool(), | |
| YouTubeSearchTool(), | |
| ] | |
| # Optional FileManagement toolkit | |
| try: | |
| from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit | |
| tools.extend(FileManagementToolkit(root_dir=".").get_tools()) | |
| except Exception: | |
| pass | |
| llm_with_tools = llm.bind_tools(tools) | |
| def agent_node(state: AgentState): | |
| msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"]) | |
| reply = llm_with_tools.invoke(msgs) | |
| return {"messages": [reply]} | |
| graph = StateGraph(AgentState) | |
| graph.add_node("agent", agent_node) | |
| graph.add_node("tools", ToolNode(tools)) | |
| graph.set_entry_point("agent") | |
| graph.add_conditional_edges("agent", tools_condition) | |
| graph.add_edge("tools", "agent") | |
| return graph.compile() | |
| # ------------------ Helper to run one question ----------------------------- | |
| def run_agent(agent_exec, question: str) -> str: | |
| try: | |
| result = agent_exec.invoke( | |
| {"messages": [HumanMessage(content=question)]}, | |
| config={"recursion_limit": 15}, | |
| ) | |
| text = result["messages"][-1].content | |
| return text.split("FINAL ANSWER:")[-1].strip() if "FINAL ANSWER:" in text else text | |
| except Exception as e: | |
| return f"Error: {e}" | |
| # ------------------ Evaluation & Submission -------------------------------- | |
| def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| if not profile: | |
| return "Please login first.", None | |
| for key in ("OPENAI_API_KEY", "TAVILY_API_KEY"): | |
| if not os.getenv(key): | |
| return f"Missing {key} env var.", None | |
| try: | |
| agent_exec = create_langgraph_agent() | |
| except Exception as e: | |
| return f"Init error: {e}", None | |
| Q_URL = "https://agents-course-unit4-scoring.hf.space/questions" | |
| S_URL = "https://agents-course-unit4-scoring.hf.space/submit" | |
| try: | |
| questions = requests.get(Q_URL, timeout=20).json() | |
| except Exception as e: | |
| return f"Fetch error: {e}", None | |
| answers = [] | |
| for q in questions: | |
| if q.get("task_id") and q.get("question"): | |
| answers.append({ | |
| "task_id": q["task_id"], | |
| "submitted_answer": run_agent(agent_exec, q["question"]), | |
| }) | |
| payload = { | |
| "username": profile.username, | |
| "agent_code": "HF_Space_Link", # not required for scoring | |
| "answers": answers, | |
| } | |
| try: | |
| res = requests.post(S_URL, json=payload, timeout=240).json() | |
| status = ( | |
| f"Score: {res.get('score', 'N/A')}% ({res.get('correct_count')}/" \ | |
| f"{res.get('total_attempted')})\nMessage: {res.get('message', '')}" | |
| ) | |
| return status, pd.DataFrame(answers) | |
| except Exception as e: | |
| return f"Submit error: {e}", pd.DataFrame(answers) | |
| # ----------------------------- UI ----------------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# GAIA Agent Runner – Excel‑aware") | |
| gr.LoginButton() | |
| btn = gr.Button("Run & Submit") | |
| out1 = gr.Textbox(label="Status", lines=4) | |
| out2 = gr.DataFrame(label="Answers", wrap=True) | |
| btn.click(fn=run_and_submit_all, outputs=[out1, out2]) | |
| if __name__ == "__main__": | |
| demo.launch() | |