ameglei-external commited on
Commit
7cf180d
·
verified ·
1 Parent(s): a88165c

Fix errors in audio and image tools

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -184,7 +184,7 @@ class BasicAgent:
184
  # img.save(img_bytes, format=img.format)
185
  # img_bytes.seek(0)
186
 
187
- with open("photo.png","rb") as f:
188
  b64 = b64encode(f.read()).decode()
189
 
190
  vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
@@ -201,7 +201,7 @@ class BasicAgent:
201
  @tool(
202
  description="Transcribe an audio file with Whisper and answer a question about its content."
203
  )
204
- def audio_qa_tool(path: str, question: str, max_chars: int = 2048) -> str:
205
  """
206
  Args:
207
  path: Local filesystem path to an audio file (mp3, wav, etc.).
@@ -214,24 +214,23 @@ class BasicAgent:
214
  return f"Error: file not found at {path}"
215
 
216
  print("File metadata:", os.stat(path))
217
- audio = AudioSegment.from_file(path)
218
- tmp_path = os.path.join(tempfile.gettempdir(), "tmp_audio.wav")
219
- audio.export(tmp_path, format="wav")
220
-
221
- model = whisper.load_model("base")
222
- result = model.transcribe(tmp_path)
223
- transcript = result.get("text", "")
224
-
225
- prompt = f"""Here is the transcript of an audio file:
226
- {transcript}
227
-
228
- Question: {question}
229
-
230
- Please answer briefly based on this transcript, and give only the answer."""
231
-
232
- response = model(completion_kwargs={"max_tokens": 200})(prompt)
233
- answer = response.choices[0].text.strip()
234
 
 
235
  return answer[:max_chars]
236
 
237
 
 
184
  # img.save(img_bytes, format=img.format)
185
  # img_bytes.seek(0)
186
 
187
+ with open(path, "rb") as f:
188
  b64 = b64encode(f.read()).decode()
189
 
190
  vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
 
201
  @tool(
202
  description="Transcribe an audio file with Whisper and answer a question about its content."
203
  )
204
+ def audio_qa_tool(path: str, question: str, max_chars: int = 10000) -> str:
205
  """
206
  Args:
207
  path: Local filesystem path to an audio file (mp3, wav, etc.).
 
214
  return f"Error: file not found at {path}"
215
 
216
  print("File metadata:", os.stat(path))
217
+ with open(path, "rb") as audio_file:
218
+ transcription = openai.Audio.transcriptions.create(
219
+ file=audio_file,
220
+ model="whisper-1"
221
+ )
222
+ transcript = transcription.text
223
+ prompt = f"""
224
+ Here is a transcript of an audio file:
225
+ '''{transcript}'''
226
+
227
+ Question: '''{question}'''
228
+
229
+ Please answer briefly based on this transcript, and give only the answer.
230
+ """
231
+ response = model.invoke([{"role": "user", "content": prompt}])
 
 
232
 
233
+ answer = response.content.strip()
234
  return answer[:max_chars]
235
 
236