mrhenu commited on
Commit
2c1cc68
·
verified ·
1 Parent(s): 662cd6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -123
app.py CHANGED
@@ -9,161 +9,92 @@ from langchain.agents import AgentExecutor
9
  from langchain_experimental.tools import PythonREPLTool
10
  from langchain_community.tools.youtube.search import YouTubeSearchTool
11
  from langchain_community.tools.tavily_search import TavilySearchResults
 
12
  from langchain_openai import ChatOpenAI
13
- from langgraph.graph import StateGraph, END
14
  from langgraph.prebuilt import ToolNode, tools_condition
15
 
16
- # --- Main Application Logic ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Agentin muisti
19
  class AgentState(TypedDict):
 
20
  messages: Annotated[Sequence[BaseMessage], operator.add]
21
 
22
- # Agentin rakentajafunktio
23
  def create_langgraph_agent():
24
- print("Initializing Advanced LangGraph Agent…")
25
 
26
- # 1. System prompt GAIA-tyyliin
27
  SYSTEM_PROMPT = """
28
- You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
29
- FINAL ANSWER: [YOUR FINAL ANSWER].
30
-
31
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
32
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
33
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
34
- If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
35
  """
36
 
37
- # 2. LLM (ei system_message-parametria -> annetaan prompt SystemMessage-na)
38
  llm = ChatOpenAI(model="gpt-4o", temperature=0)
39
 
40
- # 3. Perustyökalut
41
  tools = [
42
  TavilySearchResults(max_results=3),
43
  PythonREPLTool(),
44
  YouTubeSearchTool(),
 
45
  ]
46
 
47
- # 4. Valinnainen FileManagement toolkit (kevyt, yleensä saatavilla)
48
  try:
49
  from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
50
- file_toolkit = FileManagementToolkit(root_dir=".")
51
- tools.extend(file_toolkit.get_tools())
52
- print("FileManagement tools loaded.")
53
- except Exception as e:
54
- print("FileManagement toolkit unavailable:", e)
55
 
56
- # 5. Bind tools
57
  llm_with_tools = llm.bind_tools(tools)
58
- print("LLM and tools initialized.")
59
 
60
- # 6. Agent-solmu (lisää system prompt joka kierroksella)
61
  def agent_node(state):
62
- print("Calling agent node…")
63
- full_msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
64
- reply = llm_with_tools.invoke(full_msgs)
65
  return {"messages": [reply]}
66
 
67
- # 7. Työkalusolmu
68
- tool_node = ToolNode(tools)
69
-
70
- # 8. Rakenna graafi
71
  graph = StateGraph(AgentState)
72
  graph.add_node("agent", agent_node)
73
- graph.add_node("tools", tool_node)
74
  graph.set_entry_point("agent")
75
  graph.add_conditional_edges("agent", tools_condition)
76
  graph.add_edge("tools", "agent")
77
 
78
- app = graph.compile()
79
- print("LangGraph agent compiled and ready.")
80
- return app
81
 
82
- # Agentin suoritusfunktio
83
- def run_agent(agent_executor, question: str) -> str:
84
- print(f"Agent received question: {question}")
85
- final_answer = ""
86
- try:
87
- response = agent_executor.invoke(
88
- {"messages": [HumanMessage(content=question)]},
89
- config={"recursion_limit": 15}
90
- )
91
- raw_answer = response['messages'][-1].content
92
- if "FINAL ANSWER:" in raw_answer:
93
- final_answer = raw_answer.split("FINAL ANSWER:")[-1].strip()
94
- else:
95
- final_answer = raw_answer
96
- except Exception as e:
97
- print(f"Error during agent execution: {e}")
98
- final_answer = f"Error: Agent failed to execute. {e}"
99
-
100
- print(f"Agent returning answer: {final_answer}")
101
- return str(final_answer)
102
-
103
- # Evaluaation ajaminen ja tulosten lähetys
104
-
105
- def run_and_submit_all(profile: gr.OAuthProfile | None):
106
- space_id = os.getenv("SPACE_ID")
107
- if not profile:
108
- return "Please login to Hugging Face.", None
109
- username = f"{profile.username}"
110
-
111
- if not os.getenv("TAVILY_API_KEY") or not os.getenv("OPENAI_API_KEY"):
112
- return "API keys missing (TAVILY_API_KEY, OPENAI_API_KEY)", None
113
-
114
- try:
115
- agent_executor = create_langgraph_agent()
116
- except Exception as e:
117
- return f"Error initializing agent: {e}", None
118
-
119
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
120
- questions_url = "https://agents-course-unit4-scoring.hf.space/questions"
121
-
122
- try:
123
- response = requests.get(questions_url, timeout=20)
124
- response.raise_for_status()
125
- questions_data = response.json()
126
- except Exception as e:
127
- return f"Error fetching questions: {e}", None
128
-
129
- answers_payload = []
130
- for item in questions_data:
131
- task_id, question_text = item.get("task_id"), item.get("question")
132
- if task_id and question_text:
133
- submitted_answer = run_agent(agent_executor, question_text)
134
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
135
-
136
- submission_data = {
137
- "username": username.strip(),
138
- "agent_code": agent_code,
139
- "answers": answers_payload,
140
- }
141
- submit_url = "https://agents-course-unit4-scoring.hf.space/submit"
142
-
143
- try:
144
- response = requests.post(submit_url, json=submission_data, timeout=240)
145
- response.raise_for_status()
146
- result_data = response.json()
147
- final_status = (
148
- f"Submission Successful!\n"
149
- f"User: {result_data.get('username')}\n"
150
- f"Overall Score: {result_data.get('score', 'N/A')}% "
151
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
152
- f"Message: {result_data.get('message', 'No message received.')}"
153
- )
154
- return final_status, pd.DataFrame(answers_payload)
155
- except Exception as e:
156
- return f"Error during submission: {e}", pd.DataFrame(answers_payload)
157
-
158
- # Gradio-käyttöliittymä
159
- with gr.Blocks() as demo:
160
- gr.Markdown("# Agent Evaluation Runner (GAIA Prompt)")
161
- gr.LoginButton()
162
- run_button = gr.Button("Run Evaluation & Submit All Answers")
163
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
164
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
165
-
166
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
167
-
168
- if __name__ == "__main__":
169
- demo.launch()
 
9
  from langchain_experimental.tools import PythonREPLTool
10
  from langchain_community.tools.youtube.search import YouTubeSearchTool
11
  from langchain_community.tools.tavily_search import TavilySearchResults
12
+ from langchain_core.tools import tool
13
  from langchain_openai import ChatOpenAI
14
+ from langgraph.graph import StateGraph
15
  from langgraph.prebuilt import ToolNode, tools_condition
16
 
17
+ # --- Custom Image Analysis Tool ---------------------------------------------
18
+
19
+ @tool("image_analysis", return_direct=True)
20
+ def image_analysis(image_path: str, prompt: str) -> str:
21
+ """Analyze an image located at image_path and answer according to prompt.
22
+ image_path: path or URL to the image file
23
+ prompt: the specific question or instruction about the image
24
+ Returns a textual answer.
25
+ """
26
+ from PIL import Image
27
+ import openai
28
+
29
+ if not os.path.exists(image_path):
30
+ return "Image path not found."
31
+
32
+ # Load image bytes
33
+ with open(image_path, "rb") as f:
34
+ img_bytes = f.read()
35
+
36
+ # Send to OpenAI vision-capable model (e.g., gpt-4o with vision)
37
+ client = openai.OpenAI()
38
+ response = client.chat.completions.create(
39
+ model="gpt-4o-mini", # vision-capable
40
+ messages=[
41
+ {
42
+ "role": "user",
43
+ "content": [
44
+ {"type": "image", "image": img_bytes},
45
+ {"type": "text", "text": prompt},
46
+ ],
47
+ }
48
+ ],
49
+ )
50
+ return response.choices[0].message.content.strip()
51
+
52
+ # --- Main Application Logic --------------------------------------------------
53
 
 
54
  class AgentState(TypedDict):
55
+ """State schema for the LangGraph agent."""
56
  messages: Annotated[Sequence[BaseMessage], operator.add]
57
 
58
+
59
  def create_langgraph_agent():
60
+ print("Initializing Advanced LangGraph Agent with vision…")
61
 
 
62
  SYSTEM_PROMPT = """
63
+ You are a general AI assistant for the GAIA test. I will ask you a question. Report your reasoning briefly, and finish with:
64
+ FINAL ANSWER: [YOUR FINAL ANSWER]
65
+ Follow the formatting rules strictly.
 
 
 
 
66
  """
67
 
 
68
  llm = ChatOpenAI(model="gpt-4o", temperature=0)
69
 
 
70
  tools = [
71
  TavilySearchResults(max_results=3),
72
  PythonREPLTool(),
73
  YouTubeSearchTool(),
74
+ image_analysis, # new vision tool
75
  ]
76
 
77
+ # Optional FileManagement tools
78
  try:
79
  from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
80
+ tools.extend(FileManagementToolkit(root_dir=".").get_tools())
81
+ except Exception:
82
+ pass
 
 
83
 
 
84
  llm_with_tools = llm.bind_tools(tools)
 
85
 
 
86
  def agent_node(state):
87
+ msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
88
+ reply = llm_with_tools.invoke(msgs)
 
89
  return {"messages": [reply]}
90
 
 
 
 
 
91
  graph = StateGraph(AgentState)
92
  graph.add_node("agent", agent_node)
93
+ graph.add_node("tools", ToolNode(tools))
94
  graph.set_entry_point("agent")
95
  graph.add_conditional_edges("agent", tools_condition)
96
  graph.add_edge("tools", "agent")
97
 
98
+ return graph.compile()
 
 
99
 
100
+ # rest of app (run_agent, Gradio UI, evaluation) remains identical to V2