Fix errors in audio and image tools
Browse files
app.py
CHANGED
|
@@ -184,7 +184,7 @@ class BasicAgent:
|
|
| 184 |
# img.save(img_bytes, format=img.format)
|
| 185 |
# img_bytes.seek(0)
|
| 186 |
|
| 187 |
-
with open(
|
| 188 |
b64 = b64encode(f.read()).decode()
|
| 189 |
|
| 190 |
vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
|
|
@@ -201,7 +201,7 @@ class BasicAgent:
|
|
| 201 |
@tool(
|
| 202 |
description="Transcribe an audio file with Whisper and answer a question about its content."
|
| 203 |
)
|
| 204 |
-
def audio_qa_tool(path: str, question: str, max_chars: int =
|
| 205 |
"""
|
| 206 |
Args:
|
| 207 |
path: Local filesystem path to an audio file (mp3, wav, etc.).
|
|
@@ -214,24 +214,23 @@ class BasicAgent:
|
|
| 214 |
return f"Error: file not found at {path}"
|
| 215 |
|
| 216 |
print("File metadata:", os.stat(path))
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
response = model(completion_kwargs={"max_tokens": 200})(prompt)
|
| 233 |
-
answer = response.choices[0].text.strip()
|
| 234 |
|
|
|
|
| 235 |
return answer[:max_chars]
|
| 236 |
|
| 237 |
|
|
|
|
| 184 |
# img.save(img_bytes, format=img.format)
|
| 185 |
# img_bytes.seek(0)
|
| 186 |
|
| 187 |
+
with open(path, "rb") as f:
|
| 188 |
b64 = b64encode(f.read()).decode()
|
| 189 |
|
| 190 |
vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
|
|
|
|
| 201 |
@tool(
|
| 202 |
description="Transcribe an audio file with Whisper and answer a question about its content."
|
| 203 |
)
|
| 204 |
+
def audio_qa_tool(path: str, question: str, max_chars: int = 10000) -> str:
|
| 205 |
"""
|
| 206 |
Args:
|
| 207 |
path: Local filesystem path to an audio file (mp3, wav, etc.).
|
|
|
|
| 214 |
return f"Error: file not found at {path}"
|
| 215 |
|
| 216 |
print("File metadata:", os.stat(path))
|
| 217 |
+
with open(path, "rb") as audio_file:
|
| 218 |
+
transcription = openai.Audio.transcriptions.create(
|
| 219 |
+
file=audio_file,
|
| 220 |
+
model="whisper-1"
|
| 221 |
+
)
|
| 222 |
+
transcript = transcription.text
|
| 223 |
+
prompt = f"""
|
| 224 |
+
Here is a transcript of an audio file:
|
| 225 |
+
'''{transcript}'''
|
| 226 |
+
|
| 227 |
+
Question: '''{question}'''
|
| 228 |
+
|
| 229 |
+
Please answer briefly based on this transcript, and give only the answer.
|
| 230 |
+
"""
|
| 231 |
+
response = model.invoke([{"role": "user", "content": prompt}])
|
|
|
|
|
|
|
| 232 |
|
| 233 |
+
answer = response.content.strip()
|
| 234 |
return answer[:max_chars]
|
| 235 |
|
| 236 |
|