Abbas133 commited on
Commit
354513a
·
verified ·
1 Parent(s): 831fa7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -17,10 +17,16 @@ def speech_to_text(audio):
17
  return "No audio provided."
18
 
19
  # Load the audio file
20
- input_audio, sample_rate = torchaudio.load(audio)
 
 
 
 
 
 
21
 
22
  # Process the audio for ASR
23
- input_audio = asr_processor(input_audio, sampling_rate=sample_rate, return_tensors="pt", padding=True)
24
  logits = asr_model(input_audio.input_values).logits
25
  predicted_ids = torch.argmax(logits, dim=-1)
26
  transcription = asr_processor.decode(predicted_ids[0])
 
17
  return "No audio provided."
18
 
19
  # Load the audio file
20
+ input_audio, original_sample_rate = torchaudio.load(audio)
21
+
22
+ # Resample the audio to 16,000 Hz if necessary
23
+ target_sample_rate = 16000
24
+ if original_sample_rate != target_sample_rate:
25
+ resampler = torchaudio.transforms.Resample(orig_freq=original_sample_rate, new_freq=target_sample_rate)
26
+ input_audio = resampler(input_audio)
27
 
28
  # Process the audio for ASR
29
+ input_audio = asr_processor(input_audio, sampling_rate=target_sample_rate, return_tensors="pt", padding=True)
30
  logits = asr_model(input_audio.input_values).logits
31
  predicted_ids = torch.argmax(logits, dim=-1)
32
  transcription = asr_processor.decode(predicted_ids[0])