AgenticAI_Finals

Paused

App Files Files Community

Marek Stoj commited on Jun 30, 2025

Commit

b5cd936

1 Parent(s): 42e22b5

Working on LangGraph Agent.

Browse files

Files changed (2) hide show

agent_langgraph.py +85 -9
app_local.py +6 -6

agent_langgraph.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import re
-from typing import TypedDict, Annotated, Optional, cast
 from langchain_core.utils.function_calling import convert_to_openai_tool
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
@@ -18,9 +19,13 @@ from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_community.tools.tavily_search import TavilySearchResults
 import requests
-# MODEL_NAME = "gpt-4.1-mini"
 # MODEL_NAME = "gpt-4o"
-MODEL_NAME = "o4-mini"
 SYSTEM_PROMPT = """\
 You are a general AI assistant.
@@ -44,12 +49,13 @@ class AgentState(TypedDict):
 class BasicAgent:
   def __init__(self):
-    llm = ChatOpenAI(model=MODEL_NAME)
     tools = [
         # DuckDuckGoSearchRun(),
         TavilySearchResults(
             tavily_api_key="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"),
     ]
     self.llm_with_tools = llm.bind_tools(tools)
@@ -70,6 +76,8 @@ class BasicAgent:
     self.agent = state_graph.compile()
     print("LangGraphAgent initialized.")
   async def __call__(self, question_item: dict) -> str:
@@ -90,21 +98,35 @@ class BasicAgent:
     prompt = f"My question:\n{question}"
-    if file_name and is_plain_text_file(file_name):
       file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
       response = requests.get(file_url)
       response.raise_for_status()
-      file_content = response.text
-      prompt += f"Attached file name: {file_name}\n"
-      prompt += f"Attached file content:\n{file_content}\n"
     input_messages: list[AnyMessage] = [HumanMessage(content=prompt)]
     messages = self.agent.invoke(
         {
             "messages": input_messages,
-            "file_name": None
         }
     )
@@ -143,6 +165,53 @@ class BasicAgent:
     return {"messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"])], "file_name": state["file_name"]}
 def is_plain_text_file(file_name: str) -> bool:
   plain_text_extensions = {'.txt', '.py', '.md', '.json',
@@ -151,6 +220,13 @@ def is_plain_text_file(file_name: str) -> bool:
   return ext in plain_text_extensions
 def ground_truth_answer(question: str) -> str:
   """
   Returns the answer corresponding to the given question,

+import base64
 import os
 import re
+from typing import Tuple, TypedDict, Annotated, Optional, cast
 from langchain_core.utils.function_calling import convert_to_openai_tool
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from langchain_community.tools.tavily_search import TavilySearchResults
 import requests
+MODEL_NAME = "gpt-4.1-mini"
 # MODEL_NAME = "gpt-4o"
+# MODEL_NAME = "o4-mini"
+# VISION_MODEL_NAME = "gpt-4o"
+VISION_MODEL_NAME = "gpt-4.1-mini"
+# VISION_MODEL_NAME = "o4-mini"
 SYSTEM_PROMPT = """\
 You are a general AI assistant.
 class BasicAgent:
   def __init__(self):
+    llm = ChatOpenAI(model=MODEL_NAME, verbose=True)
     tools = [
         # DuckDuckGoSearchRun(),
         TavilySearchResults(
             tavily_api_key="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"),
+        self.analyze_image,
     ]
     self.llm_with_tools = llm.bind_tools(tools)
     self.agent = state_graph.compile()
+    self.vision_llm = ChatOpenAI(model=VISION_MODEL_NAME, verbose=True)
     print("LangGraphAgent initialized.")
   async def __call__(self, question_item: dict) -> str:
     prompt = f"My question:\n{question}"
+    if file_name:
       file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
       response = requests.get(file_url)
       response.raise_for_status()
+      if is_plain_text_file(file_name):
+        file_content = response.text
+        prompt += f"\nAttached file name: {file_name}\n"
+        prompt += f"Attached file content:\n{file_content}\n"
+      else:
+        is_image, mime_type = is_image_file(file_name)
+        if is_image:
+          print("Content length:", len(response.content))
+          image_data = base64.b64encode(response.content).decode("utf-8")
+          # write the image data to a file
+          with open("dupa-jasia.png", "wb") as f:
+            f.write(response.content)
+          with open("pierdzi-stasia.png.base64", "w") as f:
+            f.write(image_data)
+          prompt += f"\nImage file name: {file_name}\n"
+          prompt += f"Image file data:\n{image_data}\n"
+          prompt += f"Image file image mime type: {mime_type}\n"
     input_messages: list[AnyMessage] = [HumanMessage(content=prompt)]
     messages = self.agent.invoke(
         {
             "messages": input_messages,
+            "file_name": file_name
         }
     )
     return {"messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"])], "file_name": state["file_name"]}
+  def analyze_image(self, image_data: str, mime_type: str) -> str:
+    """
+    Analyze an image file using a multimodal model.
+    Args:
+        image_data: A base64-encoded image file data (string).
+        mime_type: The MIME type of the image (e.g., "image/png", "image/jpeg").
+    Returns:
+        A detailed analysis of the image content.
+    """
+    all_text = ""
+    try:
+      message = [
+          HumanMessage(
+              content=[
+                  {
+                      "type": "text",
+                      "text": (
+                          "Analyze the image content, in detail. "
+                          "Return detailed analysis."
+                      ),
+                  },
+                  {
+                      "type": "image_url",
+                      "image_url": {
+                          "url": f"data:{mime_type};base64,{image_data}"
+                      },
+                  },
+              ]
+          )
+      ]
+      response = self.vision_llm.invoke(message)
+      print(response)
+      all_text += str(response.content)
+      return all_text.strip()
+    except Exception as e:
+      error_msg = f"Error analyzing image: {str(e)}"
+      print(error_msg)
+      return ""
 def is_plain_text_file(file_name: str) -> bool:
   plain_text_extensions = {'.txt', '.py', '.md', '.json',
   return ext in plain_text_extensions
+def is_image_file(file_name: str) -> Tuple[bool, str]:
+  image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
+  ext = os.path.splitext(file_name)[1].lower()
+  mime_type = f"image/{ext[1:]}"
+  return (ext in image_extensions, mime_type)
 def ground_truth_answer(question: str) -> str:
   """
   Returns the answer corresponding to the given question,

app_local.py CHANGED Viewed

@@ -21,9 +21,9 @@ from agent_langgraph import BasicAgent
 # file_name = None
 # # Answer: Rd5
-# task_id="cca530fc-4052-43b2-b130-b30968d8aa44"
-# question="Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
-# file_name="cca530fc-4052-43b2-b130-b30968d8aa44.png"
 # # Answer: FunkMonk
 # task_id="4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
@@ -61,9 +61,9 @@ from agent_langgraph import BasicAgent
 # file_name=None
 # Answer: 0
-task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
-question = "What is the final numeric output from the attached Python code?"
-file_name = "f918266a-b3e0-4914-865d-4faa564f1aef.py"
 # # Answer: 519
 # task_id="3f57289b-8c60-48be-bd80-01f8099ca449"

 # file_name = None
 # # Answer: Rd5
+task_id="cca530fc-4052-43b2-b130-b30968d8aa44"
+question="Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
+file_name="cca530fc-4052-43b2-b130-b30968d8aa44.png"
 # # Answer: FunkMonk
 # task_id="4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
 # file_name=None
 # Answer: 0
+# task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
+# question = "What is the final numeric output from the attached Python code?"
+# file_name = "f918266a-b3e0-4914-865d-4faa564f1aef.py"
 # # Answer: 519
 # task_id="3f57289b-8c60-48be-bd80-01f8099ca449"