Gijs Wijngaard commited on
Commit
0dbdb6b
Β·
1 Parent(s): 1782be2
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -30,23 +30,15 @@ def run_phi4(audio_path: str, instruction: str) -> str:
30
  if not audio_path:
31
  return "Please upload an audio file."
32
 
33
- messages = [
34
- {
35
- "role": "user",
36
- "content": [
37
- {"type": "audio", "url": audio_path},
38
- {"type": "text", "text": "Transcribe the audio to text, and then translate the audio to French. Use <sep> as a separator between the origina transcript and the translation."},
39
- ],
40
- },
41
- ]
42
-
43
- inputs = processor.apply_chat_template(
44
- messages,
45
- add_generation_prompt=True,
46
- tokenize=True,
47
- return_dict=True,
48
- return_tensors="pt",
49
- ).to(model.device)
50
 
51
  output_ids = model.generate(
52
  **inputs,
 
30
  if not audio_path:
31
  return "Please upload an audio file."
32
 
33
+ audio, samplerate = sf.read(audio_path)
34
+
35
+ user_prompt = "<|user|>"
36
+ assistant_prompt = "<|assistant|>"
37
+ prompt_suffix = "<|end|>"
38
+
39
+ prompt = f"{user_prompt}<|audio_1|>{instruction}{prompt_suffix}{assistant_prompt}"
40
+
41
+ inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
42
 
43
  output_ids = model.generate(
44
  **inputs,