WhatTheAudio / app.py
dwishank's picture
Update app.py
f3f662d verified
import streamlit as st
import torch
from transformers import pipeline
import tempfile
import os
st.set_page_config(page_title="Meeting Audio Analyzer", page_icon="🎙️", layout="wide")
st.title("🎙️ Meeting Audio Analyzer")
st.caption("Upload a meeting recording — get a full transcript, summary, action items, and key topics.")
@st.cache_resource
def load_models():
transcriber = pipeline(
"automatic-speech-recognition",
model="openai/whisper-base",
chunk_length_s=30,
stride_length_s=5,
return_timestamps=False,
device=0 if torch.cuda.is_available() else -1,
)
summarizer = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-12-6",
device=0 if torch.cuda.is_available() else -1,
)
return transcriber, summarizer
with st.spinner("Loading models (first run takes ~2 minutes)..."):
transcriber, summarizer = load_models()
def chunk_text(text, max_tokens=900):
words = text.split()
chunks, current = [], []
for word in words:
current.append(word)
if len(current) >= max_tokens:
chunks.append(" ".join(current))
current = []
if current:
chunks.append(" ".join(current))
return chunks
def summarize_transcript(transcript):
if not transcript.strip():
return "No transcript available."
word_count = len(transcript.split())
if word_count <= 900:
result = summarizer(transcript, max_length=200, min_length=60, do_sample=False)
return result[0]["summary_text"]
chunks = chunk_text(transcript)
chunk_summaries = []
for chunk in chunks:
r = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
chunk_summaries.append(r[0]["summary_text"])
combined = " ".join(chunk_summaries)
if len(combined.split()) > 900:
combined = " ".join(combined.split()[:900])
final = summarizer(combined, max_length=250, min_length=80, do_sample=False)
return final[0]["summary_text"]
def extract_action_items(transcript):
keywords = [
"will ", "should ", "need to ", "must ", "action:", "todo:",
"follow up", "follow-up", "assign", "deadline", "by next",
"responsible", "let's ", "we'll ", "i'll ", "you'll ",
]
sentences = [s.strip() for s in transcript.replace("\n", " ").split(".") if len(s.strip()) > 15]
actions = [f"• {s}." for s in sentences if any(k in s.lower() for k in keywords)]
return "\n".join(actions[:10]) if actions else "No specific action items detected."
def extract_key_topics(summary):
stop_words = {
"the","a","an","is","are","was","were","be","been","being","have",
"has","had","do","does","did","will","would","could","should","may",
"might","and","but","or","of","in","on","at","by","for","with",
"to","from","this","that","it","its","they","we","you","he","she",
"also","if","any","then","what","which","who","how","all","each",
"very","just","too","than","both","about","into","through","these",
}
words = summary.lower().split()
freq = {}
for w in words:
w = w.strip(".,!?;:()'\"")
if w and w not in stop_words and len(w) > 3:
freq[w] = freq.get(w, 0) + 1
top = sorted(freq, key=freq.get, reverse=True)[:8]
return " • ".join(t.title() for t in top) if top else "Could not extract topics."
uploaded_file = st.file_uploader(
"Upload your meeting audio",
type=["mp3", "wav", "m4a", "ogg", "webm", "flac"],
)
if uploaded_file is not None:
st.audio(uploaded_file)
if st.button("Analyze Meeting", type="primary", use_container_width=True):
suffix = os.path.splitext(uploaded_file.name)[1] or ".mp3"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(uploaded_file.read())
tmp_path = tmp.name
try:
with st.spinner("Transcribing audio..."):
result = transcriber(tmp_path)
transcript = result["text"].strip()
if not transcript:
st.error("Transcription produced no text. Try a clearer audio file.")
else:
with st.spinner("Analyzing meeting content..."):
summary = summarize_transcript(transcript)
actions = extract_action_items(transcript)
topics = extract_key_topics(summary)
word_count = len(transcript.split())
st.success(f"Done! {word_count} words transcribed — ~{word_count // 130 + 1} min read")
tab1, tab2, tab3, tab4 = st.tabs(["Summary", "Action Items", "Key Topics", "Full Transcript"])
with tab1:
st.subheader("Meeting Summary")
st.write(summary)
with tab2:
st.subheader("Action Items")
st.text(actions)
with tab3:
st.subheader("Key Topics")
st.write(topics)
with tab4:
st.subheader("Full Transcript")
st.text_area("", transcript, height=400, label_visibility="collapsed")
except Exception as e:
st.error(f"Error during processing: {str(e)}")
finally:
os.unlink(tmp_path)
st.divider()
st.caption("Models: Whisper Base · DistilBART CNN — runs fully locally, no API keys needed.")