Change openai call api for audio and vision tools
Browse files
app.py
CHANGED
|
@@ -177,10 +177,18 @@ class BasicAgent:
|
|
| 177 |
|
| 178 |
with open(path, "rb") as f:
|
| 179 |
b64 = b64encode(f.read()).decode()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
msg = HumanMessage(content=[
|
| 182 |
{"type":"text", "text": question},
|
| 183 |
-
|
| 184 |
])
|
| 185 |
response = model.invoke([SystemMessage(content="Analyze the image."), msg])
|
| 186 |
result = response.content
|
|
@@ -205,7 +213,8 @@ class BasicAgent:
|
|
| 205 |
|
| 206 |
print("File metadata:", os.stat(path))
|
| 207 |
with open(path, "rb") as audio_file:
|
| 208 |
-
|
|
|
|
| 209 |
file=audio_file,
|
| 210 |
model="whisper-1"
|
| 211 |
)
|
|
|
|
| 177 |
|
| 178 |
with open(path, "rb") as f:
|
| 179 |
b64 = b64encode(f.read()).decode()
|
| 180 |
+
|
| 181 |
+
file_msg = {
|
| 182 |
+
"type": "file",
|
| 183 |
+
"file": {
|
| 184 |
+
"data": b64,
|
| 185 |
+
"filename": os.path.basename(path)
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
|
| 189 |
msg = HumanMessage(content=[
|
| 190 |
{"type":"text", "text": question},
|
| 191 |
+
file_msg
|
| 192 |
])
|
| 193 |
response = model.invoke([SystemMessage(content="Analyze the image."), msg])
|
| 194 |
result = response.content
|
|
|
|
| 213 |
|
| 214 |
print("File metadata:", os.stat(path))
|
| 215 |
with open(path, "rb") as audio_file:
|
| 216 |
+
client = openai.OpenAI()
|
| 217 |
+
transcription = client.audio.transcriptions.create(
|
| 218 |
file=audio_file,
|
| 219 |
model="whisper-1"
|
| 220 |
)
|