Final_Assignment_Template

Sleeping

App Files Files Community

JabrilJacobs commited on May 24, 2025

Commit

2a76391

verified ·

1 Parent(s): 482af65

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -10

app.py CHANGED Viewed

@@ -68,15 +68,6 @@ class NewAgent:
                 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
                 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
                                    )
-            # sys_msg = SystemMessage(
-            #     content=f"""
-            #     You are a general AI assistant. I will ask you a question.
-            #     Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
-            #     YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-            #     If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-            #     If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-            #     If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
-            #                        )
             return {
                 "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
             }
@@ -100,6 +91,128 @@ class NewAgent:
         messages = [HumanMessage(content=question)]
         response = alfred.invoke({"messages": messages})
         return response['messages'][-1].content
 def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -124,7 +237,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         # agent = BasicAgent()
-        agent = NewAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None

                 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
                 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
                                    )
             return {
                 "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
             }
         messages = [HumanMessage(content=question)]
         response = alfred.invoke({"messages": messages})
         return response['messages'][-1].content
+class NewAgent2:
+    def __init__(self):
+        print("NewAgent initialized.")
+    def _process_question_input(self, question_input: Union[str, Dict[str, Any]]) -> tuple:
+        """
+        Process the question input which could be:
+        - A simple string
+        - A dictionary with text and image data
+        """
+        if isinstance(question_input, str):
+            return question_input, None
+        # If it's a dictionary, extract text and image
+        if isinstance(question_input, dict):
+            text = question_input.get('text', question_input.get('question', ''))
+            image_data = question_input.get('image', question_input.get('image_url', None))
+            return text, image_data
+        return str(question_input), None
+    def _create_message_content(self, text: str, image_data: str = None) -> Union[str, list]:
+        """
+        Create message content that can handle both text and images
+        """
+        if not image_data:
+            return text
+        # Handle different image formats
+        if image_data.startswith('http'):
+            # URL format
+            return [
+                {"type": "text", "text": text},
+                {"type": "image_url", "image_url": {"url": image_data}}
+            ]
+        elif image_data.startswith('data:image'):
+            # Base64 data URL format
+            return [
+                {"type": "text", "text": text},
+                {"type": "image_url", "image_url": {"url": image_data}}
+            ]
+        else:
+            # Assume it's base64 encoded image data
+            image_url = f"data:image/jpeg;base64,{image_data}"
+            return [
+                {"type": "text", "text": text},
+                {"type": "image_url", "image_url": {"url": image_url}}
+            ]
+    def __call__(self, question: Union[str, Dict[str, Any]]) -> str:
+        print(f"Agent received question input: {str(question)[:100]}...")
+        # Process the input to extract text and image
+        question_text, image_data = self._process_question_input(question)
+        print(f"Extracted text: {question_text[:50]}...")
+        print(f"Image data present: {image_data is not None}")
+        # Initialize the web search tool
+        search_tool = DuckDuckGoSearchRun()
+        # Initialize the Hub stats tool
+        hub_stats_tool = Tool(
+            name="get_hub_stats",
+            func=get_hub_stats,
+            description="Fetches the most downloaded model from a specific author on the Hugging Face Hub."
+        )
+        # Generate the chat interface, including the tools
+        tools = [
+            search_tool,
+            hub_stats_tool,
+        ]
+        # Use a vision-capable model
+        llm = ChatOpenAI(model="gpt-4o")  # Vision-capable model
+        llm_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)
+        # Generate the AgentState and Agent graph
+        class AgentState(TypedDict):
+            messages: Annotated[list[AnyMessage], add_messages]
+        def assistant(state: AgentState):
+            sys_msg = SystemMessage(
+                content=f"""
+                You are a general AI assistant. I will ask you a question that may include text and/or images.
+                If you cannot find an answer, you may report your thoughts.
+                If you find an answer, your response should only contain your final answer. Report nothing before or after this answer.
+                YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+                If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+                If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+                If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+                If an image is provided, analyze it carefully and answer based on what you see in the image.
+                """
+            )
+            return {
+                "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
+            }
+        ## The graph
+        builder = StateGraph(AgentState)
+        # Define nodes: these do the work
+        builder.add_node("assistant", assistant)
+        builder.add_node("tools", ToolNode(tools))
+        # Define edges: these determine how the control flow moves
+        builder.add_edge(START, "assistant")
+        builder.add_conditional_edges(
+            "assistant",
+            # If the latest message requires a tool, route to tools
+            # Otherwise, provide a direct response
+            tools_condition,
+        )
+        builder.add_edge("tools", "assistant")
+        alfred = builder.compile()
+        # Create the human message with proper content format
+        message_content = self._create_message_content(question_text, image_data)
+        messages = [HumanMessage(content=message_content)]
+        response = alfred.invoke({"messages": messages})
+        return response['messages'][-1].content
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         # agent = BasicAgent()
+        # agent = NewAgent()
+        agent = NewAgent2()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None