Final_Assignment_Template

Sleeping

App Files Files Community

ameglei-external commited on May 12, 2025

Commit

7ae8320

verified ·

1 Parent(s): 67bb955

Add new tools

Browse files

Files changed (1) hide show

app.py +82 -1

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 from contextlib import suppress
 from pprint import pprint
 from typing import TypedDict, List, Dict, Any, Optional, Tuple
 from typing_extensions import Annotated
@@ -7,6 +9,9 @@ from typing_extensions import Annotated
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
 from duckduckgo_search import DDGS
@@ -33,7 +38,10 @@ class BasicAgent:
         self.tools = [
             BasicAgent.search_tool,
             BasicAgent.find_local_files_tool,
-            BasicAgent.read_text_file_tool
         ]
         # Chat model with tool support
@@ -157,6 +165,79 @@ class BasicAgent:
         print(f"\nCalling read text file tool for", file_name)
         with open(file_name, 'r') as f:
             return f.read()
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import os
+import tempfile
 from contextlib import suppress
+from io import BytesIO
 from pprint import pprint
 from typing import TypedDict, List, Dict, Any, Optional, Tuple
 from typing_extensions import Annotated
 import gradio as gr
 import requests
 import inspect
+from PIL import Image
+from pydub import AudioSegment
+import whisper
 import pandas as pd
 from duckduckgo_search import DDGS
         self.tools = [
             BasicAgent.search_tool,
             BasicAgent.find_local_files_tool,
+            BasicAgent.read_text_file_tool,
+            BasicAgent.vision_tool,
+            BasicAgent.audio_qa_tool,
+            BasicAgent.excel_tool
         ]
         # Chat model with tool support
         print(f"\nCalling read text file tool for", file_name)
         with open(file_name, 'r') as f:
             return f.read()
+    @staticmethod
+    @tool(
+        description="Analyze an image file and answer a follow-up question about its content."
+    )
+    def vision_tool(path: str, question: str) -> str:
+        """
+        Args:
+          path: Path to a local image file.
+          question: What you want to know (e.g. 'How many people are in this photo?').
+        Returns:
+          The LLM’s answer based on the image content.
+        """
+        # Load & save as bytes so the vision model can consume it
+        img = Image.open(path)
+        img_bytes = BytesIO()
+        img.save(img_bytes, format=img.format)
+        img_bytes.seek(0)
+        vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
+        result = vision.analyze_image(img_bytes, question)
+        return result
+    @staticmethod
+    @tool(
+        description="Transcribe an audio file with Whisper and answer a question about its content."
+    )
+    def audio_qa_tool(path: str, question: str, max_chars: int = 2048) -> str:
+        """
+        Args:
+          path: Local filesystem path to an audio file (mp3, wav, etc.).
+          question: What to ask about the audio content.
+          max_chars: Maximum length of the returned answer.
+        Returns:
+          The LLM’s answer, based on the transcript (truncated if necessary).
+        """
+        if not os.path.exists(path):
+            return f"Error: file not found at {path}"
+        audio = AudioSegment.from_file(path)
+        tmp_path = os.path.join(tempfile.gettempdir(), "tmp_audio.wav")
+        audio.export(tmp_path, format="wav")
+        model = whisper.load_model("base")
+        result = model.transcribe(tmp_path)
+        transcript = result.get("text", "")
+        prompt = f"""Here is the transcript of an audio file:
+        {transcript}
+        Question: {question}
+        Please answer briefly based on this transcript, and give only the answer."""
+        response = self.model(completion_kwargs={"max_tokens": 200})(prompt)
+        answer = response.choices[0].text.strip()
+        return answer[:max_chars]
+    @staticmethod
+    @tool(
+        description="Load an Excel file and returns it's text representation."
+    )
+    def excel_tool(path: str) -> str:
+        """
+        Args:
+          path: Path to the .xlsx file.
+        Returns:
+          The string form of the content.
+        """
+        df = pd.read_excel(path)
+        return str(df.to_csv(index=False))
 def run_and_submit_all( profile: gr.OAuthProfile | None):