Spaces:

dwishank
/

WhatTheAudio

Sleeping

App Files Files Community

dwishank commited on about 1 month ago

Commit

f3f662d

verified ·

1 Parent(s): 69bae60

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -157

app.py CHANGED Viewed

@@ -1,32 +1,33 @@
-import gradio as gr
 import torch
 from transformers import pipeline
 import os
-print("Loading Whisper transcription model...")
-transcriber = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-base",
-    chunk_length_s=30,
-    stride_length_s=5,
-    return_timestamps=False,
-    device=0 if torch.cuda.is_available() else -1,
-)
-print("Loading summarization model...")
-summarizer = pipeline(
-    "summarization",
-    model="sshleifer/distilbart-cnn-12-6",
-    device=0 if torch.cuda.is_available() else -1,
-)
-print("Models ready.")
-def transcribe_audio(audio_path):
-    result = transcriber(audio_path)
-    return result["text"].strip()
 def chunk_text(text, max_tokens=900):
     words = text.split()
@@ -40,176 +41,106 @@ def chunk_text(text, max_tokens=900):
         chunks.append(" ".join(current))
     return chunks
 def summarize_transcript(transcript):
     if not transcript.strip():
-        return "No transcript available to summarize."
     word_count = len(transcript.split())
     if word_count <= 900:
-        result = summarizer(
-            transcript,
-            max_length=200,
-            min_length=60,
-            do_sample=False,
-        )
         return result[0]["summary_text"]
-    chunks = chunk_text(transcript, max_tokens=900)
     chunk_summaries = []
     for chunk in chunks:
         r = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
         chunk_summaries.append(r[0]["summary_text"])
     combined = " ".join(chunk_summaries)
     if len(combined.split()) > 900:
         combined = " ".join(combined.split()[:900])
     final = summarizer(combined, max_length=250, min_length=80, do_sample=False)
     return final[0]["summary_text"]
 def extract_action_items(transcript):
-    action_keywords = [
-        "will ", "should ", "need to ", "must ", "action:",
-        "todo:", "follow up", "follow-up", "assign", "deadline",
-        "by next", "responsible", "let's ", "we'll ", "i'll ", "you'll ",
-    ]
-    sentences = [
-        s.strip()
-        for s in transcript.replace("\n", " ").split(".")
-        if len(s.strip()) > 15
     ]
-    actions = []
-    for sentence in sentences:
-        lower = sentence.lower()
-        if any(kw in lower for kw in action_keywords):
-            actions.append(f"• {sentence.strip()}.")
-    if not actions:
-        return "No specific action items detected."
-    return "\n".join(actions[:10])
 def extract_key_topics(summary):
     stop_words = {
-        "the", "a", "an", "is", "are", "was", "were", "be", "been",
-        "being", "have", "has", "had", "do", "does", "did", "will",
-        "would", "could", "should", "may", "might", "shall", "can",
-        "and", "but", "or", "nor", "so", "yet", "both", "either",
-        "neither", "not", "only", "own", "same", "than", "too", "very",
-        "just", "because", "as", "until", "while", "of", "in", "on",
-        "at", "by", "for", "with", "about", "into", "through", "during",
-        "before", "after", "to", "from", "up", "down", "out", "this",
-        "that", "these", "those", "it", "its", "they", "their", "there",
-        "we", "our", "you", "your", "he", "she", "his", "her", "also",
-        "if", "any", "then", "what", "which", "who", "how", "all", "each",
     }
     words = summary.lower().split()
     freq = {}
     for w in words:
-        w_clean = w.strip(".,!?;:()'\"")
-        if w_clean and w_clean not in stop_words and len(w_clean) > 3:
-            freq[w_clean] = freq.get(w_clean, 0) + 1
     top = sorted(freq, key=freq.get, reverse=True)[:8]
-    if not top:
-        return "Topics could not be extracted."
-    return "  •  ".join(t.title() for t in top)
-def analyze_meeting(audio_file):
-    if audio_file is None:
-        return ("Please upload an audio file.", "", "", "", "")
-    try:
-        transcript = transcribe_audio(audio_file)
-        if not transcript:
-            return ("Transcription produced no text. Try a clearer audio file.", "", "", "", "")
-        summary = summarize_transcript(transcript)
-        actions = extract_action_items(transcript)
-        topics = extract_key_topics(summary)
-        word_count = len(transcript.split())
-        stats = f"📊 {word_count} words transcribed  |  ~{word_count // 130 + 1} min read"
-        return transcript, summary, actions, topics, stats
-    except Exception as e:
-        return (f"Error during processing: {str(e)}", "", "", "", "")
-with gr.Blocks(
-    title="Meeting Audio Analyzer",
-    theme=gr.themes.Soft(),
-    css="""
-        #title { text-align: center; margin-bottom: 0.5rem; }
-        #subtitle { text-align: center; color: #666; margin-bottom: 1.5rem; font-size: 0.95rem; }
-        footer { display: none !important; }
-    """,
-) as demo:
-    gr.Markdown("# Meeting Audio Analyzer", elem_id="title")
-    gr.Markdown(
-        "Upload a meeting recording — get a full transcript, summary, action items, and key topics.",
-        elem_id="subtitle",
-    )
-    with gr.Row():
-        with gr.Column(scale=1):
-            audio_input = gr.Audio(
-                label="Upload Meeting Audio",
-                type="filepath",
-                sources=["upload"],
-            )
-            analyze_btn = gr.Button("Analyze Meeting", variant="primary", size="lg")
-            stats_out = gr.Markdown(value="", label="")
-        with gr.Column(scale=2):
-            with gr.Tabs():
-                with gr.TabItem("Summary"):
-                    summary_out = gr.Textbox(
-                        label="Meeting Summary",
-                        lines=8,
-                        interactive=False,
-                        placeholder="Summary will appear here after analysis...",
-                    )
-                with gr.TabItem("Action Items"):
-                    actions_out = gr.Textbox(
-                        label="Action Items",
-                        lines=8,
-                        interactive=False,
-                        placeholder="Action items will appear here...",
-                    )
-                with gr.TabItem("Key Topics"):
-                    topics_out = gr.Textbox(
-                        label="Key Topics",
-                        lines=3,
-                        interactive=False,
-                        placeholder="Key topics will appear here...",
-                    )
-                with gr.TabItem("Full Transcript"):
-                    transcript_out = gr.Textbox(
-                        label="Full Transcript",
-                        lines=15,
-                        interactive=False,
-                        placeholder="Full transcript will appear here...",
-                    )
-    analyze_btn.click(
-        fn=analyze_meeting,
-        inputs=[audio_input],
-        outputs=[transcript_out, summary_out, actions_out, topics_out, stats_out],
-        show_progress=True,
-    )
-    gr.Markdown(
-        "Models: [Whisper Base](https://huggingface.co/openai/whisper-base) · "
-        "[DistilBART CNN](https://huggingface.co/sshleifer/distilbart-cnn-12-6) — "
-        "runs fully locally, no API keys needed."
-    )
-if __name__ == "__main__":
-    demo.launch(share=True)

+import streamlit as st
 import torch
 from transformers import pipeline
+import tempfile
 import os
+st.set_page_config(page_title="Meeting Audio Analyzer", page_icon="🎙️", layout="wide")
+st.title("🎙️ Meeting Audio Analyzer")
+st.caption("Upload a meeting recording — get a full transcript, summary, action items, and key topics.")
+@st.cache_resource
+def load_models():
+    transcriber = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-base",
+        chunk_length_s=30,
+        stride_length_s=5,
+        return_timestamps=False,
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    summarizer = pipeline(
+        "summarization",
+        model="sshleifer/distilbart-cnn-12-6",
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    return transcriber, summarizer
+with st.spinner("Loading models (first run takes ~2 minutes)..."):
+    transcriber, summarizer = load_models()
 def chunk_text(text, max_tokens=900):
     words = text.split()
         chunks.append(" ".join(current))
     return chunks
 def summarize_transcript(transcript):
     if not transcript.strip():
+        return "No transcript available."
     word_count = len(transcript.split())
     if word_count <= 900:
+        result = summarizer(transcript, max_length=200, min_length=60, do_sample=False)
         return result[0]["summary_text"]
+    chunks = chunk_text(transcript)
     chunk_summaries = []
     for chunk in chunks:
         r = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
         chunk_summaries.append(r[0]["summary_text"])
     combined = " ".join(chunk_summaries)
     if len(combined.split()) > 900:
         combined = " ".join(combined.split()[:900])
     final = summarizer(combined, max_length=250, min_length=80, do_sample=False)
     return final[0]["summary_text"]
 def extract_action_items(transcript):
+    keywords = [
+        "will ", "should ", "need to ", "must ", "action:", "todo:",
+        "follow up", "follow-up", "assign", "deadline", "by next",
+        "responsible", "let's ", "we'll ", "i'll ", "you'll ",
     ]
+    sentences = [s.strip() for s in transcript.replace("\n", " ").split(".") if len(s.strip()) > 15]
+    actions = [f"• {s}." for s in sentences if any(k in s.lower() for k in keywords)]
+    return "\n".join(actions[:10]) if actions else "No specific action items detected."
 def extract_key_topics(summary):
     stop_words = {
+        "the","a","an","is","are","was","were","be","been","being","have",
+        "has","had","do","does","did","will","would","could","should","may",
+        "might","and","but","or","of","in","on","at","by","for","with",
+        "to","from","this","that","it","its","they","we","you","he","she",
+        "also","if","any","then","what","which","who","how","all","each",
+        "very","just","too","than","both","about","into","through","these",
     }
     words = summary.lower().split()
     freq = {}
     for w in words:
+        w = w.strip(".,!?;:()'\"")
+        if w and w not in stop_words and len(w) > 3:
+            freq[w] = freq.get(w, 0) + 1
     top = sorted(freq, key=freq.get, reverse=True)[:8]
+    return "  •  ".join(t.title() for t in top) if top else "Could not extract topics."
+uploaded_file = st.file_uploader(
+    "Upload your meeting audio",
+    type=["mp3", "wav", "m4a", "ogg", "webm", "flac"],
+)
+if uploaded_file is not None:
+    st.audio(uploaded_file)
+    if st.button("Analyze Meeting", type="primary", use_container_width=True):
+        suffix = os.path.splitext(uploaded_file.name)[1] or ".mp3"
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            tmp.write(uploaded_file.read())
+            tmp_path = tmp.name
+        try:
+            with st.spinner("Transcribing audio..."):
+                result = transcriber(tmp_path)
+                transcript = result["text"].strip()
+            if not transcript:
+                st.error("Transcription produced no text. Try a clearer audio file.")
+            else:
+                with st.spinner("Analyzing meeting content..."):
+                    summary = summarize_transcript(transcript)
+                    actions = extract_action_items(transcript)
+                    topics = extract_key_topics(summary)
+                word_count = len(transcript.split())
+                st.success(f"Done!  {word_count} words transcribed — ~{word_count // 130 + 1} min read")
+                tab1, tab2, tab3, tab4 = st.tabs(["Summary", "Action Items", "Key Topics", "Full Transcript"])
+                with tab1:
+                    st.subheader("Meeting Summary")
+                    st.write(summary)
+                with tab2:
+                    st.subheader("Action Items")
+                    st.text(actions)
+                with tab3:
+                    st.subheader("Key Topics")
+                    st.write(topics)
+                with tab4:
+                    st.subheader("Full Transcript")
+                    st.text_area("", transcript, height=400, label_visibility="collapsed")
+        except Exception as e:
+            st.error(f"Error during processing: {str(e)}")
+        finally:
+            os.unlink(tmp_path)
+st.divider()
+st.caption("Models: Whisper Base · DistilBART CNN — runs fully locally, no API keys needed.")