Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,11 +60,11 @@ def transcribe_audio(audio_path, original_text):
|
|
| 60 |
waveform, sample_rate = torchaudio.load(audio_path)
|
| 61 |
if waveform.shape[0] > 1:
|
| 62 |
waveform = waveform.mean(dim=0, keepdim=True)
|
| 63 |
-
if sample_rate !=
|
| 64 |
-
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=
|
| 65 |
waveform = transform(waveform)
|
| 66 |
waveform = waveform / waveform.abs().max()
|
| 67 |
-
input_values = processor(waveform.squeeze().numpy(), sampling_rate=
|
| 68 |
with torch.no_grad():
|
| 69 |
logits = model(input_values).logits
|
| 70 |
predicted_ids = torch.argmax(logits, dim=-1)
|
|
@@ -74,7 +74,7 @@ def transcribe_audio(audio_path, original_text):
|
|
| 74 |
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
|
| 75 |
# Speaking speed
|
| 76 |
transcribed_words = transcription.strip().split()
|
| 77 |
-
duration = waveform.shape[1] /
|
| 78 |
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
|
| 79 |
result = {
|
| 80 |
"📝 Transcribed Text": transcription,
|
|
|
|
| 60 |
waveform, sample_rate = torchaudio.load(audio_path)
|
| 61 |
if waveform.shape[0] > 1:
|
| 62 |
waveform = waveform.mean(dim=0, keepdim=True)
|
| 63 |
+
if sample_rate != 16000:
|
| 64 |
+
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
| 65 |
waveform = transform(waveform)
|
| 66 |
waveform = waveform / waveform.abs().max()
|
| 67 |
+
input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
|
| 68 |
with torch.no_grad():
|
| 69 |
logits = model(input_values).logits
|
| 70 |
predicted_ids = torch.argmax(logits, dim=-1)
|
|
|
|
| 74 |
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
|
| 75 |
# Speaking speed
|
| 76 |
transcribed_words = transcription.strip().split()
|
| 77 |
+
duration = waveform.shape[1] / 16000
|
| 78 |
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
|
| 79 |
result = {
|
| 80 |
"📝 Transcribed Text": transcription,
|