Final_Assignment_Template

Sleeping

ameglei-external commited on May 14, 2025

Commit

f689ac1

verified ·

1 Parent(s): ab3859d

Change openai call api for audio and vision tools

Files changed (1) hide show

app.py CHANGED Viewed

@@ -177,10 +177,18 @@ class BasicAgent:
         with open(path, "rb") as f:
             b64 = b64encode(f.read()).decode()
         msg = HumanMessage(content=[
             {"type":"text", "text": question},
-            {"type":"image_base64", "image_base64": {"data": b64}}
         ])
         response = model.invoke([SystemMessage(content="Analyze the image."), msg])
         result = response.content
@@ -205,7 +213,8 @@ class BasicAgent:
         print("File metadata:", os.stat(path))
         with open(path, "rb") as audio_file:
-            transcription = openai.Audio.transcriptions.create(
                 file=audio_file,
                 model="whisper-1"
             )

         with open(path, "rb") as f:
             b64 = b64encode(f.read()).decode()
+        file_msg = {
+            "type": "file",
+            "file": {
+                "data": b64,
+                "filename": os.path.basename(path)
+            }
+        }
         msg = HumanMessage(content=[
             {"type":"text", "text": question},
+            file_msg
         ])
         response = model.invoke([SystemMessage(content="Analyze the image."), msg])
         result = response.content
         print("File metadata:", os.stat(path))
         with open(path, "rb") as audio_file:
+            client = openai.OpenAI()
+            transcription = client.audio.transcriptions.create(
                 file=audio_file,
                 model="whisper-1"
             )