Spaces:

EpistemeAI
/

AudioGemma

Sleeping

legolasyiu commited on Jan 19

Commit

d55b3ca

verified ·

1 Parent(s): e31676e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ model = AutoModelForImageTextToText.from_pretrained(
     device_map="auto"
 )
 def convert_audio_to_text(audio_file):
     # Load audio
     waveform, sample_rate = torchaudio.load(audio_file)
@@ -19,10 +21,18 @@ def convert_audio_to_text(audio_file):
     if waveform.shape[0] > 1:
         waveform = waveform.mean(dim=0, keepdim=True)
-    prompt = "Transcribe the audio."
     inputs = processor.apply_chat_template(
-        prompt,
         add_generation_prompt=True,
         tokenize=True, return_dict=True,
         return_tensors="pt",

     device_map="auto"
 )
 def convert_audio_to_text(audio_file):
     # Load audio
     waveform, sample_rate = torchaudio.load(audio_file)
     if waveform.shape[0] > 1:
         waveform = waveform.mean(dim=0, keepdim=True)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "audio", "audio": audio_file},
+                {"type": "text", "text": "Transcribe this audio into English, and then translate it into French."},
+            ]
+        }
+    ]
     inputs = processor.apply_chat_template(
+        messages,
         add_generation_prompt=True,
         tokenize=True, return_dict=True,
         return_tensors="pt",