import streamlit as st from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline import torch import librosa import soundfile as sf import io # Load models @st.cache_resource def load_models(): processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") return processor, model, summarizer processor, model, summarizer = load_models() # Function to convert audio to text def audio_to_text(audio_file): try: # Read the audio file from BytesIO audio_bytes = audio_file.read() audio_file.seek(0) # Reset the file pointer # Use soundfile to read the audio data with io.BytesIO(audio_bytes) as f: data, samplerate = sf.read(f) # Resample to 16kHz if necessary if samplerate != 16000: data = librosa.resample(data, orig_sr=samplerate, target_sr=16000) # Convert to input values for the model input_values = processor(data, return_tensors="pt", sampling_rate=16000).input_values # Perform inference with torch.no_grad(): logits = model(input_values).logits # Decode the output predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.decode(predicted_ids[0]) return transcription except Exception as e: st.error(f"Error processing audio: {e}") return None # Function to summarize text def summarize_text(text): try: if len(text.strip()) < 10: # Check if the text is too short st.warning("The transcribed text is too short to summarize.") return None summary = summarizer(text, max_length=130, min_length=30, do_sample=False) return summary[0]['summary_text'] except Exception as e: st.error(f"Error summarizing text: {e}") return None # Streamlit app def main(): st.title("Audio Summarization App") st.write("Upload an audio file (WAV or MP3) to get a summary of its content.") # File uploader audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3"]) if audio_file is not None: st.audio(audio_file, format="audio/wav") # Process the audio file if st.button("Generate Summary"): with st.spinner("Processing audio..."): # Convert audio to text text = audio_to_text(audio_file) if text: st.subheader("Transcribed Text:") st.write(text) # Summarize the text summary = summarize_text(text) if summary: st.subheader("Summary:") st.write(summary) else: st.warning("No summary generated. The transcribed text may be too short or unclear.") else: st.error("Failed to transcribe the audio. Please check the file format and try again.") if __name__ == "__main__": main()