Marek Stoj commited on
Commit
b5cd936
·
1 Parent(s): 42e22b5

Working on LangGraph Agent.

Browse files
Files changed (2) hide show
  1. agent_langgraph.py +85 -9
  2. app_local.py +6 -6
agent_langgraph.py CHANGED
@@ -1,6 +1,7 @@
 
1
  import os
2
  import re
3
- from typing import TypedDict, Annotated, Optional, cast
4
 
5
  from langchain_core.utils.function_calling import convert_to_openai_tool
6
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
@@ -18,9 +19,13 @@ from langchain_community.tools import DuckDuckGoSearchRun
18
  from langchain_community.tools.tavily_search import TavilySearchResults
19
  import requests
20
 
21
- # MODEL_NAME = "gpt-4.1-mini"
22
  # MODEL_NAME = "gpt-4o"
23
- MODEL_NAME = "o4-mini"
 
 
 
 
24
 
25
  SYSTEM_PROMPT = """\
26
  You are a general AI assistant.
@@ -44,12 +49,13 @@ class AgentState(TypedDict):
44
 
45
  class BasicAgent:
46
  def __init__(self):
47
- llm = ChatOpenAI(model=MODEL_NAME)
48
 
49
  tools = [
50
  # DuckDuckGoSearchRun(),
51
  TavilySearchResults(
52
  tavily_api_key="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"),
 
53
  ]
54
 
55
  self.llm_with_tools = llm.bind_tools(tools)
@@ -70,6 +76,8 @@ class BasicAgent:
70
 
71
  self.agent = state_graph.compile()
72
 
 
 
73
  print("LangGraphAgent initialized.")
74
 
75
  async def __call__(self, question_item: dict) -> str:
@@ -90,21 +98,35 @@ class BasicAgent:
90
 
91
  prompt = f"My question:\n{question}"
92
 
93
- if file_name and is_plain_text_file(file_name):
94
  file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
95
  response = requests.get(file_url)
96
  response.raise_for_status()
97
- file_content = response.text
98
 
99
- prompt += f"Attached file name: {file_name}\n"
100
- prompt += f"Attached file content:\n{file_content}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  input_messages: list[AnyMessage] = [HumanMessage(content=prompt)]
103
 
104
  messages = self.agent.invoke(
105
  {
106
  "messages": input_messages,
107
- "file_name": None
108
  }
109
  )
110
 
@@ -143,6 +165,53 @@ class BasicAgent:
143
 
144
  return {"messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"])], "file_name": state["file_name"]}
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def is_plain_text_file(file_name: str) -> bool:
148
  plain_text_extensions = {'.txt', '.py', '.md', '.json',
@@ -151,6 +220,13 @@ def is_plain_text_file(file_name: str) -> bool:
151
  return ext in plain_text_extensions
152
 
153
 
 
 
 
 
 
 
 
154
  def ground_truth_answer(question: str) -> str:
155
  """
156
  Returns the answer corresponding to the given question,
 
1
+ import base64
2
  import os
3
  import re
4
+ from typing import Tuple, TypedDict, Annotated, Optional, cast
5
 
6
  from langchain_core.utils.function_calling import convert_to_openai_tool
7
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 
19
  from langchain_community.tools.tavily_search import TavilySearchResults
20
  import requests
21
 
22
+ MODEL_NAME = "gpt-4.1-mini"
23
  # MODEL_NAME = "gpt-4o"
24
+ # MODEL_NAME = "o4-mini"
25
+
26
+ # VISION_MODEL_NAME = "gpt-4o"
27
+ VISION_MODEL_NAME = "gpt-4.1-mini"
28
+ # VISION_MODEL_NAME = "o4-mini"
29
 
30
  SYSTEM_PROMPT = """\
31
  You are a general AI assistant.
 
49
 
50
  class BasicAgent:
51
  def __init__(self):
52
+ llm = ChatOpenAI(model=MODEL_NAME, verbose=True)
53
 
54
  tools = [
55
  # DuckDuckGoSearchRun(),
56
  TavilySearchResults(
57
  tavily_api_key="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"),
58
+ self.analyze_image,
59
  ]
60
 
61
  self.llm_with_tools = llm.bind_tools(tools)
 
76
 
77
  self.agent = state_graph.compile()
78
 
79
+ self.vision_llm = ChatOpenAI(model=VISION_MODEL_NAME, verbose=True)
80
+
81
  print("LangGraphAgent initialized.")
82
 
83
  async def __call__(self, question_item: dict) -> str:
 
98
 
99
  prompt = f"My question:\n{question}"
100
 
101
+ if file_name:
102
  file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
103
  response = requests.get(file_url)
104
  response.raise_for_status()
 
105
 
106
+ if is_plain_text_file(file_name):
107
+ file_content = response.text
108
+ prompt += f"\nAttached file name: {file_name}\n"
109
+ prompt += f"Attached file content:\n{file_content}\n"
110
+ else:
111
+ is_image, mime_type = is_image_file(file_name)
112
+ if is_image:
113
+ print("Content length:", len(response.content))
114
+ image_data = base64.b64encode(response.content).decode("utf-8")
115
+ # write the image data to a file
116
+ with open("dupa-jasia.png", "wb") as f:
117
+ f.write(response.content)
118
+ with open("pierdzi-stasia.png.base64", "w") as f:
119
+ f.write(image_data)
120
+ prompt += f"\nImage file name: {file_name}\n"
121
+ prompt += f"Image file data:\n{image_data}\n"
122
+ prompt += f"Image file image mime type: {mime_type}\n"
123
 
124
  input_messages: list[AnyMessage] = [HumanMessage(content=prompt)]
125
 
126
  messages = self.agent.invoke(
127
  {
128
  "messages": input_messages,
129
+ "file_name": file_name
130
  }
131
  )
132
 
 
165
 
166
  return {"messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"])], "file_name": state["file_name"]}
167
 
168
+ def analyze_image(self, image_data: str, mime_type: str) -> str:
169
+ """
170
+ Analyze an image file using a multimodal model.
171
+
172
+ Args:
173
+ image_data: A base64-encoded image file data (string).
174
+ mime_type: The MIME type of the image (e.g., "image/png", "image/jpeg").
175
+
176
+ Returns:
177
+ A detailed analysis of the image content.
178
+ """
179
+
180
+ all_text = ""
181
+
182
+ try:
183
+ message = [
184
+ HumanMessage(
185
+ content=[
186
+ {
187
+ "type": "text",
188
+ "text": (
189
+ "Analyze the image content, in detail. "
190
+ "Return detailed analysis."
191
+ ),
192
+ },
193
+ {
194
+ "type": "image_url",
195
+ "image_url": {
196
+ "url": f"data:{mime_type};base64,{image_data}"
197
+ },
198
+ },
199
+ ]
200
+ )
201
+ ]
202
+
203
+ response = self.vision_llm.invoke(message)
204
+
205
+ print(response)
206
+
207
+ all_text += str(response.content)
208
+
209
+ return all_text.strip()
210
+ except Exception as e:
211
+ error_msg = f"Error analyzing image: {str(e)}"
212
+ print(error_msg)
213
+ return ""
214
+
215
 
216
  def is_plain_text_file(file_name: str) -> bool:
217
  plain_text_extensions = {'.txt', '.py', '.md', '.json',
 
220
  return ext in plain_text_extensions
221
 
222
 
223
+ def is_image_file(file_name: str) -> Tuple[bool, str]:
224
+ image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
225
+ ext = os.path.splitext(file_name)[1].lower()
226
+ mime_type = f"image/{ext[1:]}"
227
+ return (ext in image_extensions, mime_type)
228
+
229
+
230
  def ground_truth_answer(question: str) -> str:
231
  """
232
  Returns the answer corresponding to the given question,
app_local.py CHANGED
@@ -21,9 +21,9 @@ from agent_langgraph import BasicAgent
21
  # file_name = None
22
 
23
  # # Answer: Rd5
24
- # task_id="cca530fc-4052-43b2-b130-b30968d8aa44"
25
- # question="Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
26
- # file_name="cca530fc-4052-43b2-b130-b30968d8aa44.png"
27
 
28
  # # Answer: FunkMonk
29
  # task_id="4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
@@ -61,9 +61,9 @@ from agent_langgraph import BasicAgent
61
  # file_name=None
62
 
63
  # Answer: 0
64
- task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
65
- question = "What is the final numeric output from the attached Python code?"
66
- file_name = "f918266a-b3e0-4914-865d-4faa564f1aef.py"
67
 
68
  # # Answer: 519
69
  # task_id="3f57289b-8c60-48be-bd80-01f8099ca449"
 
21
  # file_name = None
22
 
23
  # # Answer: Rd5
24
+ task_id="cca530fc-4052-43b2-b130-b30968d8aa44"
25
+ question="Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
26
+ file_name="cca530fc-4052-43b2-b130-b30968d8aa44.png"
27
 
28
  # # Answer: FunkMonk
29
  # task_id="4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
 
61
  # file_name=None
62
 
63
  # Answer: 0
64
+ # task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
65
+ # question = "What is the final numeric output from the attached Python code?"
66
+ # file_name = "f918266a-b3e0-4914-865d-4faa564f1aef.py"
67
 
68
  # # Answer: 519
69
  # task_id="3f57289b-8c60-48be-bd80-01f8099ca449"