Final_Assignment_Template

Build error

App Files Files Community

jc7k commited on May 13, 2025

Commit

6ac7b51

1 Parent(s): 199dce0

Updated input file fetching and tool handing for images, audio and python code

Browse files

Files changed (2) hide show

app.py +93 -41
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import os
 import gradio as gr
 import requests
 import inspect
 import yaml
 import pandas as pd
 from smolagents import (
     OpenAIServerModel,
     ToolCallingAgent,
@@ -12,7 +15,9 @@ from smolagents import (
     DuckDuckGoSearchTool,
     WebSearchTool,
     VisitWebpageTool,
-    SpeechToTextTool
 )
 from dotenv import load_dotenv
 # Load environment variables from .env file
@@ -44,20 +49,29 @@ class BasicAgent:
             description="This agent can search the web and visit webpages to gather information.",
         )
-        # stt_agent = ToolCallingAgent(
-        #     verbosity_level=1,
-        #     tools=[SpeechToTextTool()],
-        #     max_steps=5,
-        #     model=model,
-        #     name="speech_to_text_agent",
-        #     description="This agent can transcribe audio files to text.",
-        # )
         manager_agent = CodeAgent(
             tools=[],
             model=model,
-            # managed_agents=[web_agent, stt_agent],
-            managed_agents=[web_agent],
             additional_authorized_imports=["time", "numpy", "pandas"],
         )
@@ -65,11 +79,27 @@ class BasicAgent:
         print(f"Agent initialized with model ID: {os.environ['MODEL_ID']}")
         print(f"Agent initialized with tools: {self.agent.tools}")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         system_prompt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer as a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Here is the question: "
-        answer = self.agent.run(system_prompt + question)
         if answer:
             print(f"Agent returning answer: {answer}")
             return answer
@@ -137,39 +167,61 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        # file_name = item.get("file_name")
-        # if file_name:
-        #     print(f"Fetching file content for task ID: {task_id}")
-        #     try:
-        #         file_url = f"{api_url}/files/{task_id}"
-        #         file_response = requests.get(file_url, timeout=15)
-        #         file_response.raise_for_status()
-        #         # Save the MP3 file temporarily
-        #         with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
-        #             temp_file.write(file_response.content)
-        #             temp_file_path = temp_file.name
-        #             print(f"MP3 file saved at: {temp_file_path}")
-        #         # Use SpeechToTextTool to process the MP3 file
-        #         speech_to_text_tool = SpeechToTextTool()
-        #         transcription = speech_to_text_tool.run(temp_file_path)
-        #         print(f"Transcription for task ID {task_id}: {transcription}")
-        #         # Clean up the temporary file
-        #         os.remove(temp_file_path)
-        #     except requests.exceptions.RequestException as e:
-        #         print(f"Error fetching file for task ID {task_id}: {e}")
-        #         continue
-        #     except Exception as e:
-        #         print(f"Error processing MP3 file for task ID {task_id}: {e}")
-        #         continue
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import os
 import gradio as gr
 import requests
+from io import BytesIO
+from PIL import Image
 import inspect
 import yaml
 import pandas as pd
 from smolagents import (
     OpenAIServerModel,
     ToolCallingAgent,
     DuckDuckGoSearchTool,
     WebSearchTool,
     VisitWebpageTool,
+    SpeechToTextTool,
+    AgentAudio,
+    PythonInterpreterTool,
 )
 from dotenv import load_dotenv
 # Load environment variables from .env file
             description="This agent can search the web and visit webpages to gather information.",
         )
+        python_agent = ToolCallingAgent(
+            verbosity_level=1,
+            tools=[PythonInterpreterTool()],
+            max_steps=5,
+            model=model,
+            name="python_agent",
+            description="This agent can run Python code snippets.",
+        )
+        stt_agent = ToolCallingAgent(
+            verbosity_level=1,
+            tools=[SpeechToTextTool()],
+            max_steps=5,
+            model=model,
+            name="speech_to_text_agent",
+            description="This agent can transcribe audio files to text.",
+        )
         manager_agent = CodeAgent(
             tools=[],
             model=model,
+            managed_agents=[web_agent, stt_agent, python_agent],
+            # managed_agents=[web_agent],
             additional_authorized_imports=["time", "numpy", "pandas"],
         )
         print(f"Agent initialized with model ID: {os.environ['MODEL_ID']}")
         print(f"Agent initialized with tools: {self.agent.tools}")
+    def __call__(self, question: str, file_name: str, file_type: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         system_prompt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer as a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Here is the question: "
+        if file_type == "image":
+            # If the file is an image, read file_name and convert it to a PIL Image
+            image = Image.open(file_name)
+            image = image.convert("RGB")
+            # Convert the image to bytes
+            image_bytes = BytesIO()
+            answer = self.agent.run(system_prompt + question, images=[image_bytes])
+        elif file_type == "audio":
+            arguments = {"audio": file_name}
+            answer = self.agent.run(system_prompt + question, arguments=arguments)
+        elif file_type == "python":
+            with open(file_name, "r") as file:
+                python_code = file.read()
+            answer = self.agent.run(system_prompt + question, code=python_code)
+        else:
+            answer = self.agent.run(system_prompt + question)
         if answer:
             print(f"Agent returning answer: {answer}")
             return answer
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        file_type = "unknown"
+        if file_name:
+            print(f"Fetching file content for task ID: {task_id}")
+            try:
+                file_url = f"{api_url}/files/{task_id}"
+                file_response = requests.get(file_url, timeout=15)
+                file_response.raise_for_status()
+                # parse the file extension for the file name to see if it is an image, audio, or python file
+                file_extension = os.path.splitext(file_name)[1].lower()
+                if file_extension in ['.jpg', '.jpeg', '.png', '.gif']:
+                    # If the file is an image, convert it to a PIL Image
+                    file_type = "image"
+                    question_text = f"Here is an image: {file_name}. Please describe it."
+                    # Save the image to a local file
+                    with open(file_name, "wb") as image_file:
+                        image_file.write(file_response.content)
+                        print(f"Saved image file: {file_name}")
+                elif file_extension in ['.wav', '.mp3', '.ogg']:
+                    # If the file is an audio file, convert it to text
+                    file_type = "audio"
+                    audio_data = file_response.content
+                    question_text = f"Here is an audio file: {file_name}. Please transcribe it."
+                    # Save the audio to a local file
+                    with open(file_name, "wb") as audio_file:
+                        audio_file.write(file_response.content)
+                        print(f"Saved audio file: {file_name}")
+                elif file_extension in ['.py']:
+                    # If the file is a Python file, you might want to run it or analyze it
+                    file_type = "python"
+                    question_text = f"Here is a Python file: {file_name}. Please analyze it."
+                    # Save the Python file to a local file
+                    with open(file_name, "wb") as python_file:
+                        python_file.write(file_response.content)
+                        print(f"Saved Python file: {file_name}")
+            except requests.exceptions.HTTPError as e:
+                print(f"Error fetching file for task ID {task_id}: {e}")
+                continue
+            except requests.exceptions.RequestException as e:
+                print(f"Error fetching file for task ID {task_id}: {e}")
+                continue
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, file_name, file_type)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 gradio
 requests
 smolagents
-smolagents[openai]

 gradio
 requests
 smolagents
+smolagents[openai]
+smolagents[audio]
+transformers