import streamlit as st from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer import torchaudio import torch import spacy from transformers import pipeline nlp_ner = spacy.load("en_core_web_sm") def ner(text): doc = nlp_ner(text) entities = [(ent.text, ent.label_) for ent in doc.ents] return entities def main(): st.title("Text and Speech Analysis") option = st.radio("Choose an option:", ("Upload Audio", "Enter Text")) if option == "Upload Audio": audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"]) if audio_file is not None: text_result = process_input(audio_file) st.success("Audio processed successfully!") st.text(text_result) process_and_display_text(text_result) elif option == "Enter Text": text_input = st.text_area("Enter your text here:") if st.button("Submit"): if text_input: process_and_display_text(text_input) def process_input(audio_input): model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h") audio_input, _ = torchaudio.load(audio_input) input_values = tokenizer(audio_input.squeeze().numpy(), return_tensors="pt").input_values with torch.no_grad(): logits = model(input_values).logits prediction_ids = torch.argmax(logits, dim=-1) transcription = tokenizer.batch_decode(prediction_ids)[0] return transcription def process_and_display_text(input_text): summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn") ner_results = dict(set(ner(input_text))) summary = summarization_pipeline( input_text, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, temperature=0.7 ) st.write("Named Entities") st.table(ner_results) st.write("Summary") st.write(summary[0]["summary_text"]) if __name__ == "__main__": main()