Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,32 +1,33 @@
|
|
| 1 |
-
import
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
-
|
| 7 |
-
transcriber = pipeline(
|
| 8 |
-
"automatic-speech-recognition",
|
| 9 |
-
model="openai/whisper-base",
|
| 10 |
-
chunk_length_s=30,
|
| 11 |
-
stride_length_s=5,
|
| 12 |
-
return_timestamps=False,
|
| 13 |
-
device=0 if torch.cuda.is_available() else -1,
|
| 14 |
-
)
|
| 15 |
-
|
| 16 |
-
print("Loading summarization model...")
|
| 17 |
-
summarizer = pipeline(
|
| 18 |
-
"summarization",
|
| 19 |
-
model="sshleifer/distilbart-cnn-12-6",
|
| 20 |
-
device=0 if torch.cuda.is_available() else -1,
|
| 21 |
-
)
|
| 22 |
|
| 23 |
-
|
|
|
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
return result["text"].strip()
|
| 29 |
-
|
| 30 |
|
| 31 |
def chunk_text(text, max_tokens=900):
|
| 32 |
words = text.split()
|
|
@@ -40,176 +41,106 @@ def chunk_text(text, max_tokens=900):
|
|
| 40 |
chunks.append(" ".join(current))
|
| 41 |
return chunks
|
| 42 |
|
| 43 |
-
|
| 44 |
def summarize_transcript(transcript):
|
| 45 |
if not transcript.strip():
|
| 46 |
-
return "No transcript available
|
| 47 |
-
|
| 48 |
word_count = len(transcript.split())
|
| 49 |
-
|
| 50 |
if word_count <= 900:
|
| 51 |
-
result = summarizer(
|
| 52 |
-
transcript,
|
| 53 |
-
max_length=200,
|
| 54 |
-
min_length=60,
|
| 55 |
-
do_sample=False,
|
| 56 |
-
)
|
| 57 |
return result[0]["summary_text"]
|
| 58 |
-
|
| 59 |
-
chunks = chunk_text(transcript, max_tokens=900)
|
| 60 |
chunk_summaries = []
|
| 61 |
for chunk in chunks:
|
| 62 |
r = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
|
| 63 |
chunk_summaries.append(r[0]["summary_text"])
|
| 64 |
-
|
| 65 |
combined = " ".join(chunk_summaries)
|
| 66 |
if len(combined.split()) > 900:
|
| 67 |
combined = " ".join(combined.split()[:900])
|
| 68 |
-
|
| 69 |
final = summarizer(combined, max_length=250, min_length=80, do_sample=False)
|
| 70 |
return final[0]["summary_text"]
|
| 71 |
|
| 72 |
-
|
| 73 |
def extract_action_items(transcript):
|
| 74 |
-
|
| 75 |
-
"will ", "should ", "need to ", "must ", "action:",
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
-
]
|
| 79 |
-
sentences = [
|
| 80 |
-
s.strip()
|
| 81 |
-
for s in transcript.replace("\n", " ").split(".")
|
| 82 |
-
if len(s.strip()) > 15
|
| 83 |
]
|
| 84 |
-
|
| 85 |
-
for
|
| 86 |
-
|
| 87 |
-
if any(kw in lower for kw in action_keywords):
|
| 88 |
-
actions.append(f"• {sentence.strip()}.")
|
| 89 |
-
|
| 90 |
-
if not actions:
|
| 91 |
-
return "No specific action items detected."
|
| 92 |
-
return "\n".join(actions[:10])
|
| 93 |
-
|
| 94 |
|
| 95 |
def extract_key_topics(summary):
|
| 96 |
stop_words = {
|
| 97 |
-
"the",
|
| 98 |
-
"
|
| 99 |
-
"
|
| 100 |
-
"
|
| 101 |
-
"
|
| 102 |
-
"just",
|
| 103 |
-
"at", "by", "for", "with", "about", "into", "through", "during",
|
| 104 |
-
"before", "after", "to", "from", "up", "down", "out", "this",
|
| 105 |
-
"that", "these", "those", "it", "its", "they", "their", "there",
|
| 106 |
-
"we", "our", "you", "your", "he", "she", "his", "her", "also",
|
| 107 |
-
"if", "any", "then", "what", "which", "who", "how", "all", "each",
|
| 108 |
}
|
| 109 |
words = summary.lower().split()
|
| 110 |
freq = {}
|
| 111 |
for w in words:
|
| 112 |
-
|
| 113 |
-
if
|
| 114 |
-
freq[
|
| 115 |
-
|
| 116 |
top = sorted(freq, key=freq.get, reverse=True)[:8]
|
| 117 |
-
if not
|
| 118 |
-
return "Topics could not be extracted."
|
| 119 |
-
return " • ".join(t.title() for t in top)
|
| 120 |
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
if not transcript:
|
| 129 |
-
return ("Transcription produced no text. Try a clearer audio file.", "", "", "", "")
|
| 130 |
|
| 131 |
-
|
| 132 |
-
actions = extract_action_items(transcript)
|
| 133 |
-
topics = extract_key_topics(summary)
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
|
|
|
|
|
|
| 143 |
|
| 144 |
-
|
| 145 |
-
title="Meeting Audio Analyzer",
|
| 146 |
-
theme=gr.themes.Soft(),
|
| 147 |
-
css="""
|
| 148 |
-
#title { text-align: center; margin-bottom: 0.5rem; }
|
| 149 |
-
#subtitle { text-align: center; color: #666; margin-bottom: 1.5rem; font-size: 0.95rem; }
|
| 150 |
-
footer { display: none !important; }
|
| 151 |
-
""",
|
| 152 |
-
) as demo:
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
elem_id="subtitle",
|
| 158 |
-
)
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
label="Upload Meeting Audio",
|
| 164 |
-
type="filepath",
|
| 165 |
-
sources=["upload"],
|
| 166 |
-
)
|
| 167 |
-
analyze_btn = gr.Button("Analyze Meeting", variant="primary", size="lg")
|
| 168 |
-
stats_out = gr.Markdown(value="", label="")
|
| 169 |
-
|
| 170 |
-
with gr.Column(scale=2):
|
| 171 |
-
with gr.Tabs():
|
| 172 |
-
with gr.TabItem("Summary"):
|
| 173 |
-
summary_out = gr.Textbox(
|
| 174 |
-
label="Meeting Summary",
|
| 175 |
-
lines=8,
|
| 176 |
-
interactive=False,
|
| 177 |
-
placeholder="Summary will appear here after analysis...",
|
| 178 |
-
)
|
| 179 |
-
with gr.TabItem("Action Items"):
|
| 180 |
-
actions_out = gr.Textbox(
|
| 181 |
-
label="Action Items",
|
| 182 |
-
lines=8,
|
| 183 |
-
interactive=False,
|
| 184 |
-
placeholder="Action items will appear here...",
|
| 185 |
-
)
|
| 186 |
-
with gr.TabItem("Key Topics"):
|
| 187 |
-
topics_out = gr.Textbox(
|
| 188 |
-
label="Key Topics",
|
| 189 |
-
lines=3,
|
| 190 |
-
interactive=False,
|
| 191 |
-
placeholder="Key topics will appear here...",
|
| 192 |
-
)
|
| 193 |
-
with gr.TabItem("Full Transcript"):
|
| 194 |
-
transcript_out = gr.Textbox(
|
| 195 |
-
label="Full Transcript",
|
| 196 |
-
lines=15,
|
| 197 |
-
interactive=False,
|
| 198 |
-
placeholder="Full transcript will appear here...",
|
| 199 |
-
)
|
| 200 |
-
|
| 201 |
-
analyze_btn.click(
|
| 202 |
-
fn=analyze_meeting,
|
| 203 |
-
inputs=[audio_input],
|
| 204 |
-
outputs=[transcript_out, summary_out, actions_out, topics_out, stats_out],
|
| 205 |
-
show_progress=True,
|
| 206 |
-
)
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
| 4 |
+
import tempfile
|
| 5 |
import os
|
| 6 |
|
| 7 |
+
st.set_page_config(page_title="Meeting Audio Analyzer", page_icon="🎙️", layout="wide")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
st.title("🎙️ Meeting Audio Analyzer")
|
| 10 |
+
st.caption("Upload a meeting recording — get a full transcript, summary, action items, and key topics.")
|
| 11 |
|
| 12 |
+
@st.cache_resource
|
| 13 |
+
def load_models():
|
| 14 |
+
transcriber = pipeline(
|
| 15 |
+
"automatic-speech-recognition",
|
| 16 |
+
model="openai/whisper-base",
|
| 17 |
+
chunk_length_s=30,
|
| 18 |
+
stride_length_s=5,
|
| 19 |
+
return_timestamps=False,
|
| 20 |
+
device=0 if torch.cuda.is_available() else -1,
|
| 21 |
+
)
|
| 22 |
+
summarizer = pipeline(
|
| 23 |
+
"summarization",
|
| 24 |
+
model="sshleifer/distilbart-cnn-12-6",
|
| 25 |
+
device=0 if torch.cuda.is_available() else -1,
|
| 26 |
+
)
|
| 27 |
+
return transcriber, summarizer
|
| 28 |
|
| 29 |
+
with st.spinner("Loading models (first run takes ~2 minutes)..."):
|
| 30 |
+
transcriber, summarizer = load_models()
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def chunk_text(text, max_tokens=900):
|
| 33 |
words = text.split()
|
|
|
|
| 41 |
chunks.append(" ".join(current))
|
| 42 |
return chunks
|
| 43 |
|
|
|
|
| 44 |
def summarize_transcript(transcript):
|
| 45 |
if not transcript.strip():
|
| 46 |
+
return "No transcript available."
|
|
|
|
| 47 |
word_count = len(transcript.split())
|
|
|
|
| 48 |
if word_count <= 900:
|
| 49 |
+
result = summarizer(transcript, max_length=200, min_length=60, do_sample=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
return result[0]["summary_text"]
|
| 51 |
+
chunks = chunk_text(transcript)
|
|
|
|
| 52 |
chunk_summaries = []
|
| 53 |
for chunk in chunks:
|
| 54 |
r = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
|
| 55 |
chunk_summaries.append(r[0]["summary_text"])
|
|
|
|
| 56 |
combined = " ".join(chunk_summaries)
|
| 57 |
if len(combined.split()) > 900:
|
| 58 |
combined = " ".join(combined.split()[:900])
|
|
|
|
| 59 |
final = summarizer(combined, max_length=250, min_length=80, do_sample=False)
|
| 60 |
return final[0]["summary_text"]
|
| 61 |
|
|
|
|
| 62 |
def extract_action_items(transcript):
|
| 63 |
+
keywords = [
|
| 64 |
+
"will ", "should ", "need to ", "must ", "action:", "todo:",
|
| 65 |
+
"follow up", "follow-up", "assign", "deadline", "by next",
|
| 66 |
+
"responsible", "let's ", "we'll ", "i'll ", "you'll ",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
]
|
| 68 |
+
sentences = [s.strip() for s in transcript.replace("\n", " ").split(".") if len(s.strip()) > 15]
|
| 69 |
+
actions = [f"• {s}." for s in sentences if any(k in s.lower() for k in keywords)]
|
| 70 |
+
return "\n".join(actions[:10]) if actions else "No specific action items detected."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
def extract_key_topics(summary):
|
| 73 |
stop_words = {
|
| 74 |
+
"the","a","an","is","are","was","were","be","been","being","have",
|
| 75 |
+
"has","had","do","does","did","will","would","could","should","may",
|
| 76 |
+
"might","and","but","or","of","in","on","at","by","for","with",
|
| 77 |
+
"to","from","this","that","it","its","they","we","you","he","she",
|
| 78 |
+
"also","if","any","then","what","which","who","how","all","each",
|
| 79 |
+
"very","just","too","than","both","about","into","through","these",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
words = summary.lower().split()
|
| 82 |
freq = {}
|
| 83 |
for w in words:
|
| 84 |
+
w = w.strip(".,!?;:()'\"")
|
| 85 |
+
if w and w not in stop_words and len(w) > 3:
|
| 86 |
+
freq[w] = freq.get(w, 0) + 1
|
|
|
|
| 87 |
top = sorted(freq, key=freq.get, reverse=True)[:8]
|
| 88 |
+
return " • ".join(t.title() for t in top) if top else "Could not extract topics."
|
|
|
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
+
uploaded_file = st.file_uploader(
|
| 92 |
+
"Upload your meeting audio",
|
| 93 |
+
type=["mp3", "wav", "m4a", "ogg", "webm", "flac"],
|
| 94 |
+
)
|
| 95 |
|
| 96 |
+
if uploaded_file is not None:
|
| 97 |
+
st.audio(uploaded_file)
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
if st.button("Analyze Meeting", type="primary", use_container_width=True):
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
suffix = os.path.splitext(uploaded_file.name)[1] or ".mp3"
|
| 102 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
| 103 |
+
tmp.write(uploaded_file.read())
|
| 104 |
+
tmp_path = tmp.name
|
| 105 |
|
| 106 |
+
try:
|
| 107 |
+
with st.spinner("Transcribing audio..."):
|
| 108 |
+
result = transcriber(tmp_path)
|
| 109 |
+
transcript = result["text"].strip()
|
| 110 |
|
| 111 |
+
if not transcript:
|
| 112 |
+
st.error("Transcription produced no text. Try a clearer audio file.")
|
| 113 |
+
else:
|
| 114 |
+
with st.spinner("Analyzing meeting content..."):
|
| 115 |
+
summary = summarize_transcript(transcript)
|
| 116 |
+
actions = extract_action_items(transcript)
|
| 117 |
+
topics = extract_key_topics(summary)
|
| 118 |
|
| 119 |
+
word_count = len(transcript.split())
|
| 120 |
+
st.success(f"Done! {word_count} words transcribed — ~{word_count // 130 + 1} min read")
|
| 121 |
|
| 122 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Summary", "Action Items", "Key Topics", "Full Transcript"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
with tab1:
|
| 125 |
+
st.subheader("Meeting Summary")
|
| 126 |
+
st.write(summary)
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
with tab2:
|
| 129 |
+
st.subheader("Action Items")
|
| 130 |
+
st.text(actions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
+
with tab3:
|
| 133 |
+
st.subheader("Key Topics")
|
| 134 |
+
st.write(topics)
|
| 135 |
+
|
| 136 |
+
with tab4:
|
| 137 |
+
st.subheader("Full Transcript")
|
| 138 |
+
st.text_area("", transcript, height=400, label_visibility="collapsed")
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
st.error(f"Error during processing: {str(e)}")
|
| 142 |
+
finally:
|
| 143 |
+
os.unlink(tmp_path)
|
| 144 |
|
| 145 |
+
st.divider()
|
| 146 |
+
st.caption("Models: Whisper Base · DistilBART CNN — runs fully locally, no API keys needed.")
|