gmustafa413's picture
Update app.py
938237f verified
import streamlit as st
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
import torch
import librosa
import soundfile as sf
import io
# Load models
@st.cache_resource
def load_models():
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
return processor, model, summarizer
processor, model, summarizer = load_models()
# Function to convert audio to text
def audio_to_text(audio_file):
try:
# Read the audio file from BytesIO
audio_bytes = audio_file.read()
audio_file.seek(0) # Reset the file pointer
# Use soundfile to read the audio data
with io.BytesIO(audio_bytes) as f:
data, samplerate = sf.read(f)
# Resample to 16kHz if necessary
if samplerate != 16000:
data = librosa.resample(data, orig_sr=samplerate, target_sr=16000)
# Convert to input values for the model
input_values = processor(data, return_tensors="pt", sampling_rate=16000).input_values
# Perform inference
with torch.no_grad():
logits = model(input_values).logits
# Decode the output
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0])
return transcription
except Exception as e:
st.error(f"Error processing audio: {e}")
return None
# Function to summarize text
def summarize_text(text):
try:
if len(text.strip()) < 10: # Check if the text is too short
st.warning("The transcribed text is too short to summarize.")
return None
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
st.error(f"Error summarizing text: {e}")
return None
# Streamlit app
def main():
st.title("Audio Summarization App")
st.write("Upload an audio file (WAV or MP3) to get a summary of its content.")
# File uploader
audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3"])
if audio_file is not None:
st.audio(audio_file, format="audio/wav")
# Process the audio file
if st.button("Generate Summary"):
with st.spinner("Processing audio..."):
# Convert audio to text
text = audio_to_text(audio_file)
if text:
st.subheader("Transcribed Text:")
st.write(text)
# Summarize the text
summary = summarize_text(text)
if summary:
st.subheader("Summary:")
st.write(summary)
else:
st.warning("No summary generated. The transcribed text may be too short or unclear.")
else:
st.error("Failed to transcribe the audio. Please check the file format and try again.")
if __name__ == "__main__":
main()