Final_Assignment_Template

Sleeping

ameglei-external commited on May 13, 2025

Commit

7cf180d

verified ·

1 Parent(s): a88165c

Fix errors in audio and image tools

Files changed (1) hide show

app.py CHANGED Viewed

@@ -184,7 +184,7 @@ class BasicAgent:
         # img.save(img_bytes, format=img.format)
         # img_bytes.seek(0)
-        with open("photo.png","rb") as f:
             b64 = b64encode(f.read()).decode()
         vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
@@ -201,7 +201,7 @@ class BasicAgent:
     @tool(
         description="Transcribe an audio file with Whisper and answer a question about its content."
     )
-    def audio_qa_tool(path: str, question: str, max_chars: int = 2048) -> str:
         """
         Args:
           path: Local filesystem path to an audio file (mp3, wav, etc.).
@@ -214,24 +214,23 @@ class BasicAgent:
             return f"Error: file not found at {path}"
         print("File metadata:", os.stat(path))
-        audio = AudioSegment.from_file(path)
-        tmp_path = os.path.join(tempfile.gettempdir(), "tmp_audio.wav")
-        audio.export(tmp_path, format="wav")
-        model = whisper.load_model("base")
-        result = model.transcribe(tmp_path)
-        transcript = result.get("text", "")
-        prompt = f"""Here is the transcript of an audio file:
-        {transcript}
-        Question: {question}
-        Please answer briefly based on this transcript, and give only the answer."""
-        response = model(completion_kwargs={"max_tokens": 200})(prompt)
-        answer = response.choices[0].text.strip()
         return answer[:max_chars]

         # img.save(img_bytes, format=img.format)
         # img_bytes.seek(0)
+        with open(path, "rb") as f:
             b64 = b64encode(f.read()).decode()
         vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
     @tool(
         description="Transcribe an audio file with Whisper and answer a question about its content."
     )
+    def audio_qa_tool(path: str, question: str, max_chars: int = 10000) -> str:
         """
         Args:
           path: Local filesystem path to an audio file (mp3, wav, etc.).
             return f"Error: file not found at {path}"
         print("File metadata:", os.stat(path))
+        with open(path, "rb") as audio_file:
+            transcription = openai.Audio.transcriptions.create(
+                file=audio_file,
+                model="whisper-1"
+            )
+        transcript = transcription.text
+        prompt = f"""
+            Here is a transcript of an audio file:
+            '''{transcript}'''
+            Question: '''{question}'''
+            Please answer briefly based on this transcript, and give only the answer.
+        """
+        response = model.invoke([{"role": "user", "content": prompt}])
+        answer = response.content.strip()
         return answer[:max_chars]