Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| st.set_page_config(page_title="Meeting Audio Analyzer", page_icon="🎙️", layout="wide") | |
| st.title("🎙️ Meeting Audio Analyzer") | |
| st.caption("Upload a meeting recording — get a full transcript, summary, action items, and key topics.") | |
| def load_models(): | |
| transcriber = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-base", | |
| chunk_length_s=30, | |
| stride_length_s=5, | |
| return_timestamps=False, | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| summarizer = pipeline( | |
| "summarization", | |
| model="sshleifer/distilbart-cnn-12-6", | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| return transcriber, summarizer | |
| with st.spinner("Loading models (first run takes ~2 minutes)..."): | |
| transcriber, summarizer = load_models() | |
| def chunk_text(text, max_tokens=900): | |
| words = text.split() | |
| chunks, current = [], [] | |
| for word in words: | |
| current.append(word) | |
| if len(current) >= max_tokens: | |
| chunks.append(" ".join(current)) | |
| current = [] | |
| if current: | |
| chunks.append(" ".join(current)) | |
| return chunks | |
| def summarize_transcript(transcript): | |
| if not transcript.strip(): | |
| return "No transcript available." | |
| word_count = len(transcript.split()) | |
| if word_count <= 900: | |
| result = summarizer(transcript, max_length=200, min_length=60, do_sample=False) | |
| return result[0]["summary_text"] | |
| chunks = chunk_text(transcript) | |
| chunk_summaries = [] | |
| for chunk in chunks: | |
| r = summarizer(chunk, max_length=150, min_length=40, do_sample=False) | |
| chunk_summaries.append(r[0]["summary_text"]) | |
| combined = " ".join(chunk_summaries) | |
| if len(combined.split()) > 900: | |
| combined = " ".join(combined.split()[:900]) | |
| final = summarizer(combined, max_length=250, min_length=80, do_sample=False) | |
| return final[0]["summary_text"] | |
| def extract_action_items(transcript): | |
| keywords = [ | |
| "will ", "should ", "need to ", "must ", "action:", "todo:", | |
| "follow up", "follow-up", "assign", "deadline", "by next", | |
| "responsible", "let's ", "we'll ", "i'll ", "you'll ", | |
| ] | |
| sentences = [s.strip() for s in transcript.replace("\n", " ").split(".") if len(s.strip()) > 15] | |
| actions = [f"• {s}." for s in sentences if any(k in s.lower() for k in keywords)] | |
| return "\n".join(actions[:10]) if actions else "No specific action items detected." | |
| def extract_key_topics(summary): | |
| stop_words = { | |
| "the","a","an","is","are","was","were","be","been","being","have", | |
| "has","had","do","does","did","will","would","could","should","may", | |
| "might","and","but","or","of","in","on","at","by","for","with", | |
| "to","from","this","that","it","its","they","we","you","he","she", | |
| "also","if","any","then","what","which","who","how","all","each", | |
| "very","just","too","than","both","about","into","through","these", | |
| } | |
| words = summary.lower().split() | |
| freq = {} | |
| for w in words: | |
| w = w.strip(".,!?;:()'\"") | |
| if w and w not in stop_words and len(w) > 3: | |
| freq[w] = freq.get(w, 0) + 1 | |
| top = sorted(freq, key=freq.get, reverse=True)[:8] | |
| return " • ".join(t.title() for t in top) if top else "Could not extract topics." | |
| uploaded_file = st.file_uploader( | |
| "Upload your meeting audio", | |
| type=["mp3", "wav", "m4a", "ogg", "webm", "flac"], | |
| ) | |
| if uploaded_file is not None: | |
| st.audio(uploaded_file) | |
| if st.button("Analyze Meeting", type="primary", use_container_width=True): | |
| suffix = os.path.splitext(uploaded_file.name)[1] or ".mp3" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: | |
| tmp.write(uploaded_file.read()) | |
| tmp_path = tmp.name | |
| try: | |
| with st.spinner("Transcribing audio..."): | |
| result = transcriber(tmp_path) | |
| transcript = result["text"].strip() | |
| if not transcript: | |
| st.error("Transcription produced no text. Try a clearer audio file.") | |
| else: | |
| with st.spinner("Analyzing meeting content..."): | |
| summary = summarize_transcript(transcript) | |
| actions = extract_action_items(transcript) | |
| topics = extract_key_topics(summary) | |
| word_count = len(transcript.split()) | |
| st.success(f"Done! {word_count} words transcribed — ~{word_count // 130 + 1} min read") | |
| tab1, tab2, tab3, tab4 = st.tabs(["Summary", "Action Items", "Key Topics", "Full Transcript"]) | |
| with tab1: | |
| st.subheader("Meeting Summary") | |
| st.write(summary) | |
| with tab2: | |
| st.subheader("Action Items") | |
| st.text(actions) | |
| with tab3: | |
| st.subheader("Key Topics") | |
| st.write(topics) | |
| with tab4: | |
| st.subheader("Full Transcript") | |
| st.text_area("", transcript, height=400, label_visibility="collapsed") | |
| except Exception as e: | |
| st.error(f"Error during processing: {str(e)}") | |
| finally: | |
| os.unlink(tmp_path) | |
| st.divider() | |
| st.caption("Models: Whisper Base · DistilBART CNN — runs fully locally, no API keys needed.") |