Agent_Course_Final_Assignment

Sleeping

App Files Files Community

giulia-fontanella commited on Jun 5, 2025

Commit

2d91d8b

verified ·

1 Parent(s): 88aac3f

Update tools.py

Browse files

Files changed (1) hide show

tools.py +55 -0

tools.py CHANGED Viewed

@@ -155,7 +155,62 @@ class DescribeImage:
             error_msg = f"Error describing image: {str(e)}"
             print(error_msg)
             return ""
 @tool
 def wiki_search(query: str) -> str:

             error_msg = f"Error describing image: {str(e)}"
             print(error_msg)
             return ""
+class TranscribeAudio:
+    def __init__(self, audio_llm: Runnable):
+        """
+        Initialize with a LangChain-compatible vision+audio GPT-4o model.
+        Args:
+            audio_llm: A LangChain Runnable for GPT-4o (must support audio inputs).
+        """
+        self.audio_llm = audio_llm
+    def __call__(self, audio_path: str) -> str:
+        """
+        Transcribe an MP3 file.
+        Args:
+            audio_path: Path to the MP3 audio file.
+        Returns:
+            Transcribed text as a string.
+        """
+        try:
+            with open(audio_path, "rb") as audio_file:
+                audio_bytes = audio_file.read()
+            audio_data = AudioFile(
+                mime_type="audio/mpeg",  # MP3 MIME type
+                data=audio_bytes
+            )
+            message = [
+                HumanMessage(
+                    content=[
+                        {
+                            "type": "text",
+                            "text": (
+                                "Transcribe the speech from this audio file. "
+                                "Return only the transcribed text, with no extra commentary."
+                            ),
+                        },
+                        {
+                            "type": "audio",
+                            "audio": audio_data,
+                        },
+                    ]
+                )
+            ]
+            response = self.audio_llm.invoke(message)
+            return response.content.strip()
+        except Exception as e:
+            error_msg = f"Error transcribing audio: {str(e)}"
+            print(error_msg)
+            return ""
 @tool
 def wiki_search(query: str) -> str: