Final_Assignment_Template

Sleeping

App Files Files Community

mrhenu commited on Jul 8, 2025

Commit

4868771

verified ·

1 Parent(s): 76f87fc

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -32

app.py CHANGED Viewed

@@ -1,42 +1,50 @@
 import os
-import gradio as gr
 import requests
 import pandas as pd
-from typing import TypedDict, Annotated, Sequence
 import operator
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 from langchain.agents import AgentExecutor
 from langchain_experimental.tools import PythonREPLTool
-from langchain_community.tools.youtube.search import YouTubeSearchTool
 from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
-# --- Custom Image Analysis Tool ---------------------------------------------
 @tool("image_analysis", return_direct=True)
 def image_analysis(image_path: str, prompt: str) -> str:
-    """Analyze an image located at image_path and answer according to prompt.
-    image_path: path or URL to the image file
-    prompt: the specific question or instruction about the image
     Returns a textual answer.
     """
-    from PIL import Image
     import openai
     if not os.path.exists(image_path):
         return "Image path not found."
-    # Load image bytes
     with open(image_path, "rb") as f:
         img_bytes = f.read()
-    # Send to OpenAI vision-capable model (e.g., gpt-4o with vision)
     client = openai.OpenAI()
-    response = client.chat.completions.create(
-        model="gpt-4o-mini",  # vision-capable
         messages=[
             {
                 "role": "user",
@@ -47,46 +55,50 @@ def image_analysis(image_path: str, prompt: str) -> str:
             }
         ],
     )
-    return response.choices[0].message.content.strip()
-# --- Main Application Logic --------------------------------------------------
 class AgentState(TypedDict):
-    """State schema for the LangGraph agent."""
     messages: Annotated[Sequence[BaseMessage], operator.add]
-def create_langgraph_agent():
-    print("Initializing Advanced LangGraph Agent with vision…")
-    SYSTEM_PROMPT = """
-You are a general AI assistant for the GAIA test. I will ask you a question. Report your reasoning briefly, and finish with:
-FINAL ANSWER: [YOUR FINAL ANSWER]
-Follow the formatting rules strictly.
-"""
     llm = ChatOpenAI(model="gpt-4o", temperature=0)
     tools = [
         TavilySearchResults(max_results=3),
         PythonREPLTool(),
         YouTubeSearchTool(),
-        image_analysis,  # new vision tool
     ]
     # Optional FileManagement tools
     try:
         from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
         tools.extend(FileManagementToolkit(root_dir=".").get_tools())
-    except Exception:
-        pass
     llm_with_tools = llm.bind_tools(tools)
-    def agent_node(state):
-        msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
-        reply = llm_with_tools.invoke(msgs)
-        return {"messages": [reply]}
     graph = StateGraph(AgentState)
     graph.add_node("agent", agent_node)
@@ -95,6 +107,85 @@ Follow the formatting rules strictly.
     graph.add_conditional_edges("agent", tools_condition)
     graph.add_edge("tools", "agent")
-    return graph.compile()
-# rest of app (run_agent, Gradio UI, evaluation) remains identical to V2

+"""Full Hugging Face Spaces app.py for GAIA agent – includes image analysis tool.
+Copy‑paste this file as‑is to your Space.
+Requires:
+  - openai>=1.7.0  (for vision)
+  - langchain, langchain-community, langgraph, gradio, pandas, requests, tavily-python, youtube-transcript-api
+  - PILLOW (installed automatically with Gradio)
+"""
 import os
 import requests
 import pandas as pd
+import gradio as gr
 import operator
+from typing import Sequence, Annotated, TypedDict
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 from langchain.agents import AgentExecutor
 from langchain_experimental.tools import PythonREPLTool
 from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.tools.youtube.search import YouTubeSearchTool
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
+# ------------------------  Vision Tool  --------------------------------------
+from langchain_core.tools import tool
 @tool("image_analysis", return_direct=True)
 def image_analysis(image_path: str, prompt: str) -> str:
+    """Analyze an image located at `image_path` according to `prompt`.
+    Example call from LLM: image_analysis{"image_path": "/mnt/data/cat.png", "prompt": "How many cats?"}
     Returns a textual answer.
     """
     import openai
+    from PIL import Image
     if not os.path.exists(image_path):
         return "Image path not found."
+    # Read image bytes
     with open(image_path, "rb") as f:
         img_bytes = f.read()
     client = openai.OpenAI()
+    completion = client.chat.completions.create(
+        model="gpt-4o-mini",  # vision‑capable
         messages=[
             {
                 "role": "user",
             }
         ],
     )
+    return completion.choices[0].message.content.strip()
+# ---------------------  LangGraph Agent  -------------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[BaseMessage], operator.add]
+SYSTEM_PROMPT = (
+    "You are a general AI assistant. I will ask you a question. Report your thoughts, "
+    "and finish your answer with the template:\nFINAL ANSWER: [YOUR FINAL ANSWER].\n\n"
+    "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n"
+    "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.\n"
+    "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.\n"
+    "If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string."
+)
+def create_langgraph_agent() -> AgentExecutor:
+    print("Initializing LangGraph GAIA agent…")
     llm = ChatOpenAI(model="gpt-4o", temperature=0)
+    # Base tools
     tools = [
         TavilySearchResults(max_results=3),
         PythonREPLTool(),
         YouTubeSearchTool(),
+        image_analysis,
     ]
     # Optional FileManagement tools
     try:
         from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
         tools.extend(FileManagementToolkit(root_dir=".").get_tools())
+        print("FileManagement tools loaded.")
+    except Exception as e:
+        print("FileManagement toolkit unavailable:", e)
     llm_with_tools = llm.bind_tools(tools)
+    def agent_node(state: AgentState):
+        full_msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
+        response = llm_with_tools.invoke(full_msgs)
+        return {"messages": [response]}
     graph = StateGraph(AgentState)
     graph.add_node("agent", agent_node)
     graph.add_conditional_edges("agent", tools_condition)
     graph.add_edge("tools", "agent")
+    executor = graph.compile()
+    print("LangGraph agent compiled.")
+    return executor
+# ---------------------  Helper to run one question ---------------------------
+def run_agent(agent_executor, question: str) -> str:
+    print("New question:", question)
+    try:
+        result = agent_executor.invoke(
+            {"messages": [HumanMessage(content=question)]},
+            config={"recursion_limit": 15},
+        )
+        answer_raw = result["messages"][-1].content
+        return answer_raw.split("FINAL ANSWER:")[-1].strip() if "FINAL ANSWER:" in answer_raw else answer_raw
+    except Exception as err:
+        print("Execution error:", err)
+        return f"Error: {err}"
+# ---------------------  Evaluation / Submission ----------------------------
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
+    if not profile:
+        return "Please login via the button.", None
+    if not (os.getenv("TAVILY_API_KEY") and os.getenv("OPENAI_API_KEY")):
+        return "Missing API keys (TAVILY / OPENAI)", None
+    try:
+        agent_exec = create_langgraph_agent()
+    except Exception as e:
+        return f"Error initializing agent: {e}", None
+    QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
+    SUBMIT_URL = "https://agents-course-unit4-scoring.hf.space/submit"
+    try:
+        q_resp = requests.get(QUESTIONS_URL, timeout=20)
+        q_resp.raise_for_status()
+        questions = q_resp.json()
+    except Exception as e:
+        return f"Error fetching questions: {e}", None
+    answers = []
+    for item in questions:
+        tid, qtext = item.get("task_id"), item.get("question")
+        if tid and qtext:
+            answers.append({"task_id": tid, "submitted_answer": run_agent(agent_exec, qtext)})
+    payload = {
+        "username": profile.username.strip(),
+        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
+        "answers": answers,
+    }
+    try:
+        s_resp = requests.post(SUBMIT_URL, json=payload, timeout=240)
+        s_resp.raise_for_status()
+        r = s_resp.json()
+        status = (
+            f"Submission Successful!\nUser: {r.get('username')}\n"
+            f"Score: {r.get('score', 'N/A')}% ({r.get('correct_count', '?')}/{r.get('total_attempted', '?')})\n"
+            f"Message: {r.get('message', 'No message')}"
+        )
+        return status, pd.DataFrame(answers)
+    except Exception as e:
+        return f"Error submitting answers: {e}", pd.DataFrame(answers)
+# ------------------------  Gradio UI  ---------------------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner (Vision‑enabled)")
+    gr.LoginButton()
+    run_btn = gr.Button("Run & Submit All Answers")
+    status_out = gr.Textbox(label="Run Status", lines=5, interactive=False)
+    table_out = gr.DataFrame(label="Questions / Answers", wrap=True)
+    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
+if __name__ == "__main__":
+    demo.launch()