BissakaAI commited on
Commit
5d41ed7
·
verified ·
1 Parent(s): c2ca4c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -8,7 +8,7 @@ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
8
  # ----------------------------
9
  # Config
10
  # ----------------------------
11
- ASR_MODEL_ID = "openai/whisper-small"
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
  DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
@@ -76,9 +76,15 @@ def transcribe_audio(audio):
76
 
77
  with torch.no_grad():
78
  generated_ids = model.generate(
79
- **inputs,
80
- max_new_tokens=256
81
- )
 
 
 
 
 
 
82
 
83
  transcription = processor.batch_decode(
84
  generated_ids,
 
8
  # ----------------------------
9
  # Config
10
  # ----------------------------
11
+ ASR_MODEL_ID = "openai/whisper-large-v3"
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
  DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
 
76
 
77
  with torch.no_grad():
78
  generated_ids = model.generate(
79
+ **inputs,
80
+ max_new_tokens=256,
81
+ language=None,
82
+ task="transcribe",
83
+ prompt_ids=processor.get_prompt_ids(
84
+ text="This audio may be in Yoruba, Hausa, Igbo, Nigerian Pidgin or English."
85
+ )
86
+ )
87
+
88
 
89
  transcription = processor.batch_decode(
90
  generated_ids,