KuyaToto commited on
Commit
7658fb6
·
verified ·
1 Parent(s): 83e3242

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -13,14 +13,24 @@ model = Wav2Vec2ForCTC.from_pretrained(model_id)
13
  def transcribe(audio_data):
14
  if audio_data is None:
15
  return "⚠️ No audio received."
16
-
17
- audio, sample_rate = audio_data # ✅ Unpack the tuple
18
 
19
- # Resample if needed
 
 
 
 
 
 
 
 
 
20
  if sample_rate != 16000:
21
  number_of_samples = round(len(audio) * 16000 / sample_rate)
22
  audio = resample(audio, number_of_samples)
23
 
 
 
 
24
  # Process and predict
25
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
26
  with torch.no_grad():
 
13
  def transcribe(audio_data):
14
  if audio_data is None:
15
  return "⚠️ No audio received."
 
 
16
 
17
+ audio, sample_rate = audio_data
18
+
19
+ # Convert stereo to mono if needed
20
+ if len(audio.shape) == 2:
21
+ audio = np.mean(audio, axis=1)
22
+
23
+ # Ensure sample_rate is an integer
24
+ sample_rate = int(sample_rate)
25
+
26
+ # Resample to 16000 Hz if needed
27
  if sample_rate != 16000:
28
  number_of_samples = round(len(audio) * 16000 / sample_rate)
29
  audio = resample(audio, number_of_samples)
30
 
31
+ # Normalize audio
32
+ audio = audio.astype(np.float32)
33
+
34
  # Process and predict
35
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
36
  with torch.no_grad():