Final_Assignment_Template

Sleeping

App Files Files Community

kostasang commited on Jun 29, 2025

Commit

7a55fbb

verified ·

1 Parent(s): 850a5e4

Upload 2 files

Browse files

Files changed (2) hide show

src/agent.py +19 -12
src/tools.py +20 -1

src/agent.py CHANGED Viewed

@@ -2,6 +2,7 @@ import base64
 import json
 from os.path import join
 from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.rate_limiters import InMemoryRateLimiter
 from langchain_openai.chat_models import ChatOpenAI
@@ -130,7 +131,7 @@ class Agent:
         :param question_file: The file that comes with the question.
         :return: Formatted HumanMessage.
         """
-        if question_file is None:
             human_message = HumanMessage(content=question)
         else:
             if '.png' in question_file:
@@ -152,21 +153,15 @@ class Agent:
                     ]
                 )
             elif '.mp3' in question_file:
-                with open(join(self.data_path, question_file), "rb") as file:
-                    file_content = base64.b64encode(file.read()).\
-                        decode("utf-8")
                 human_message = HumanMessage(
                     content=[
                         {
                             'type': 'text',
-                            'text': question
                         },
-                        {
-                            'type': 'audio',
-                            'source_type': 'base64',
-                            'data': file_content,
-                            'mime_type': 'audio/mp3'
-                        }
                     ]
                 )
             elif '.py' in question_file:
@@ -182,7 +177,19 @@ class Agent:
                         ]
                     )
             elif '.xlsx' in question_file:
-                human_message = HumanMessage(content=question)
         return human_message

 import json
 from os.path import join
+import pandas as pd
 from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.rate_limiters import InMemoryRateLimiter
 from langchain_openai.chat_models import ChatOpenAI
         :param question_file: The file that comes with the question.
         :return: Formatted HumanMessage.
         """
+        if question_file is None or question_file == '':
             human_message = HumanMessage(content=question)
         else:
             if '.png' in question_file:
                     ]
                 )
             elif '.mp3' in question_file:
+                # There is no support for audio fileswhen using gpt-4o
+                # So, I will use a tools to record the .mp3 file in text
                 human_message = HumanMessage(
                     content=[
                         {
                             'type': 'text',
+                            'text': f'''{question}\n\nHere is the audio file:
+                            ```audio\n{question_file}\n```'''
                         },
                     ]
                 )
             elif '.py' in question_file:
                         ]
                     )
             elif '.xlsx' in question_file:
+                data = pd.read_excel(
+                    join(self.data_path, question_file),
+                )
+                data = data.to_string()
+                human_message = HumanMessage(
+                    content=[
+                        {
+                            'type': 'text',
+                            'text': f'''{question}\n\nHere is the data:
+                            ```\n{data}\n```'''
+                        },
+                    ]
+                )
         return human_message

src/tools.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import json
-from langchain_community.document_loaders import WikipediaLoader
 from langchain_core.tools import tool
@@ -86,3 +88,20 @@ def wiki_search(query: str) -> str:
         for doc in search_docs
     ]
     return json.dumps({"wiki_results": formatted_search_docs})

 import json
+from langchain_community.document_loaders import \
+    AssemblyAIAudioTranscriptLoader, WikipediaLoader
 from langchain_core.tools import tool
         for doc in search_docs
     ]
     return json.dumps({"wiki_results": formatted_search_docs})
+@tool
+def audio_transcript(
+    audiofile_path: str
+) -> str:
+    """
+    Transcribe an audio file to text.
+    Args:
+        audiofile_path: The path to the audio file to transcribe.
+    """
+    loader = AssemblyAIAudioTranscriptLoader(
+        file_path=audiofile_path,
+    )
+    docs = loader.load(file_path=audiofile_path)
+    return docs[0].page_content if docs else "No transcription available."