Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import io
|
|
| 9 |
import os
|
| 10 |
from bs4 import BeautifulSoup
|
| 11 |
import re
|
|
|
|
| 12 |
|
| 13 |
# Load the transcription model
|
| 14 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
|
@@ -36,8 +37,11 @@ def transcribe_audio(audio_bytes):
|
|
| 36 |
waveform, sample_rate = torchaudio.load("temp_audio.wav")
|
| 37 |
os.remove("temp_audio.wav")
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
# Transcribe the audio
|
| 40 |
-
result = transcription_pipeline(
|
| 41 |
transcript = result['text']
|
| 42 |
|
| 43 |
# Split transcript into paragraphs based on silence
|
|
@@ -47,10 +51,13 @@ def transcribe_audio(audio_bytes):
|
|
| 47 |
|
| 48 |
for chunk in chunks:
|
| 49 |
chunk.export("temp_chunk.wav", format="wav")
|
| 50 |
-
|
| 51 |
os.remove("temp_chunk.wav")
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
chunk_transcript = chunk_result['text']
|
| 55 |
|
| 56 |
if chunk_transcript:
|
|
|
|
| 9 |
import os
|
| 10 |
from bs4 import BeautifulSoup
|
| 11 |
import re
|
| 12 |
+
import numpy as np
|
| 13 |
|
| 14 |
# Load the transcription model
|
| 15 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
|
|
|
| 37 |
waveform, sample_rate = torchaudio.load("temp_audio.wav")
|
| 38 |
os.remove("temp_audio.wav")
|
| 39 |
|
| 40 |
+
# Convert torch.Tensor to numpy.ndarray
|
| 41 |
+
waveform_np = waveform.numpy().squeeze()
|
| 42 |
+
|
| 43 |
# Transcribe the audio
|
| 44 |
+
result = transcription_pipeline(waveform_np, chunk_length_s=30)
|
| 45 |
transcript = result['text']
|
| 46 |
|
| 47 |
# Split transcript into paragraphs based on silence
|
|
|
|
| 51 |
|
| 52 |
for chunk in chunks:
|
| 53 |
chunk.export("temp_chunk.wav", format="wav")
|
| 54 |
+
waveform_chunk, sample_rate_chunk = torchaudio.load("temp_chunk.wav")
|
| 55 |
os.remove("temp_chunk.wav")
|
| 56 |
|
| 57 |
+
# Convert torch.Tensor to numpy.ndarray
|
| 58 |
+
waveform_chunk_np = waveform_chunk.numpy().squeeze()
|
| 59 |
+
|
| 60 |
+
chunk_result = transcription_pipeline(waveform_chunk_np, chunk_length_s=30)
|
| 61 |
chunk_transcript = chunk_result['text']
|
| 62 |
|
| 63 |
if chunk_transcript:
|