hafsaabd82 commited on
Commit
c026df6
·
verified ·
1 Parent(s): a2dbf4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -17,6 +17,7 @@ import time
17
  import shutil
18
  from starlette.concurrency import run_in_threadpool
19
  import gc
 
20
  try:
21
  import noisereduce as nr
22
  HAVE_NOISEREDUCE = True
@@ -50,6 +51,9 @@ model_name = "large-v2"
50
  ALIGN_MODEL_MAP = {
51
  "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
52
  global_align_model_cache = {}
 
 
 
53
  class TimelineItem(BaseModel):
54
  start: float
55
  end: float
@@ -217,10 +221,16 @@ def analyze_audio(audio_file: str,
217
  model = whisperx.load_model(model_name, device, compute_type="float32")
218
  audio_loaded = whisperx.load_audio(audio_for_model)
219
  print("Detecting language...")
220
- lang_result = model.transcribe(audio_loaded, batch_size=4, language=None)
221
- language_code_detected = lang_result.get("language") or lang_result.get("detected_language")
222
- languageCode = language_code_detected
223
- results.languageCode = languageCode
 
 
 
 
 
 
224
  print("Transcribing audio...")
225
  transcribed_language = "ur"
226
  result = model.transcribe(audio_loaded, batch_size=BATCH_SIZE, language= transcribed_language
 
17
  import shutil
18
  from starlette.concurrency import run_in_threadpool
19
  import gc
20
+ from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
21
  try:
22
  import noisereduce as nr
23
  HAVE_NOISEREDUCE = True
 
51
  ALIGN_MODEL_MAP = {
52
  "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
53
  global_align_model_cache = {}
54
+ processor = AutoFeatureExtractor.from_pretrained("facebook/mms-lid-4017")
55
+ model = Wav2Vec2ForSequenceClassification.from_pretrained("facebook/mms-lid-4017")
56
+ model.to("cpu")
57
  class TimelineItem(BaseModel):
58
  start: float
59
  end: float
 
221
  model = whisperx.load_model(model_name, device, compute_type="float32")
222
  audio_loaded = whisperx.load_audio(audio_for_model)
223
  print("Detecting language...")
224
+ inputs = processor(audio_loaded, sampling_rate=target_sr, return_tensors="pt")
225
+ with torch.no_grad():
226
+ outputs = model(**inputs).logits
227
+ lang_id = torch.argmax(outputs, dim=-1)[0].item()
228
+ detected_language = model.config.id2label[lang_id]
229
+ languageCode = detected_language
230
+ # lang_result = model.transcribe(audio_loaded, batch_size=4, language=None)
231
+ # language_code_detected = lang_result.get("language") or lang_result.get("detected_language")
232
+ # languageCode = language_code_detected
233
+ # results.languageCode = languageCode
234
  print("Transcribing audio...")
235
  transcribed_language = "ur"
236
  result = model.transcribe(audio_loaded, batch_size=BATCH_SIZE, language= transcribed_language