legolasyiu commited on
Commit
d55b3ca
·
verified ·
1 Parent(s): e31676e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -11,6 +11,8 @@ model = AutoModelForImageTextToText.from_pretrained(
11
  device_map="auto"
12
  )
13
 
 
 
14
  def convert_audio_to_text(audio_file):
15
  # Load audio
16
  waveform, sample_rate = torchaudio.load(audio_file)
@@ -19,10 +21,18 @@ def convert_audio_to_text(audio_file):
19
  if waveform.shape[0] > 1:
20
  waveform = waveform.mean(dim=0, keepdim=True)
21
 
22
- prompt = "Transcribe the audio."
 
 
 
 
 
 
 
 
23
 
24
  inputs = processor.apply_chat_template(
25
- prompt,
26
  add_generation_prompt=True,
27
  tokenize=True, return_dict=True,
28
  return_tensors="pt",
 
11
  device_map="auto"
12
  )
13
 
14
+
15
+
16
  def convert_audio_to_text(audio_file):
17
  # Load audio
18
  waveform, sample_rate = torchaudio.load(audio_file)
 
21
  if waveform.shape[0] > 1:
22
  waveform = waveform.mean(dim=0, keepdim=True)
23
 
24
+ messages = [
25
+ {
26
+ "role": "user",
27
+ "content": [
28
+ {"type": "audio", "audio": audio_file},
29
+ {"type": "text", "text": "Transcribe this audio into English, and then translate it into French."},
30
+ ]
31
+ }
32
+ ]
33
 
34
  inputs = processor.apply_chat_template(
35
+ messages,
36
  add_generation_prompt=True,
37
  tokenize=True, return_dict=True,
38
  return_tensors="pt",