Spaces:

Phani1008
/

AI_Summarizer

Sleeping

App Files Files Community

Phani1008 commited on Dec 5, 2025

Commit

44f5184

verified ·

1 Parent(s): 349f090

Create app.py

Browse files

Files changed (1) hide show

app.py +248 -0

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import streamlit as st
+import whisper
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import textwrap
+# =========================
+# 1. PAGE CONFIG
+# =========================
+st.set_page_config(
+    page_title="AI Meeting & Lecture Summarizer",
+    page_icon="🧠",
+    layout="wide"
+)
+st.title("🧠 AI Meeting & Lecture Summarizer")
+st.write(
+    """
+    Upload a **recorded class / meeting audio file** or paste a **transcript**, and this app will:
+    - ✅ Transcribe (if audio)
+    - 📝 Summarize the content
+    - ✅ Extract **Action Items**
+    - 📌 Extract **Key Points / Decisions**
+    """
+)
+# =========================
+# 2. LOAD MODELS (CACHED)
+# =========================
+@st.cache_resource
+def load_whisper_model(model_name: str = "small"):
+    """
+    Load Whisper ASR model once and cache it.
+    """
+    asr_model = whisper.load_model(model_name)
+    return asr_model
+@st.cache_resource
+def load_summarization_model():
+    """
+    Load BART summarization model and tokenizer once and cache them.
+    """
+    model_name = "facebook/bart-large-cnn"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    return tokenizer, model
+tokenizer, summarizer_model = load_summarization_model()
+# =========================
+# 3. CORE SUMMARIZATION FUNCTIONS
+# =========================
+def summarize_text(text, max_len=120, min_len=40):
+    """
+    Summarize a given text using the BART model.
+    """
+    # 1. Tokenize text
+    inputs = tokenizer(
+        text,
+        max_length=1024,    # BART max input length
+        truncation=True,
+        return_tensors="pt"
+    )
+    # 2. Generate summary token IDs
+    summary_ids = summarizer_model.generate(
+        inputs["input_ids"],
+        num_beams=5,
+        length_penalty=1.2,
+        no_repeat_ngram_size=3,
+        max_length=max_len,
+        min_length=min_len,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        early_stopping=True,
+    )
+    # 3. Decode token IDs to text
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return summary
+def summarize_long_text(text, max_chunk_chars=3000, max_len=120, min_len=40):
+    """
+    Handle long transcripts by:
+    1. Splitting into chunks
+    2. Summarizing each chunk
+    3. Summarizing the combined summaries
+    """
+    if len(text) <= max_chunk_chars:
+        return summarize_text(text, max_len=max_len, min_len=min_len)
+    # Split by lines into manageable chunks
+    chunks = []
+    current_chunk = ""
+    for line in text.splitlines():
+        if len(current_chunk) + len(line) + 1 <= max_chunk_chars:
+            current_chunk += " " + line
+        else:
+            chunks.append(current_chunk.strip())
+            current_chunk = line
+    if current_chunk.strip():
+        chunks.append(current_chunk.strip())
+    partial_summaries = []
+    for i, chunk in enumerate(chunks, start=1):
+        with st.spinner(f"Summarizing chunk {i}/{len(chunks)} (length={len(chunk)} chars)..."):
+            part = summarize_text(chunk, max_len=max_len, min_len=min_len)
+            partial_summaries.append(part)
+    # Combine partial summaries and summarize again
+    combined = " ".join(partial_summaries)
+    final_summary = summarize_text(combined, max_len=max_len, min_len=min_len)
+    return final_summary
+def extract_action_items(transcript_text: str) -> str:
+    """
+    Use summarization to extract action items via an instruction-style prompt.
+    """
+    prompt = (
+        "You are an assistant that reads a meeting or class transcript and extracts ACTION ITEMS.\n"
+        "Action items are specific tasks that someone needs to do in the future.\n\n"
+        "Transcript:\n"
+        f"{transcript_text}\n\n"
+        "Now list the ACTION ITEMS as clear bullet points:\n"
+        "- "
+    )
+    action_items_summary = summarize_long_text(
+        prompt,
+        max_chunk_chars=3000,
+        max_len=200,
+        min_len=60
+    )
+    return action_items_summary
+def extract_key_points(transcript_text: str) -> str:
+    """
+    Use summarization to extract key points and decisions via an instruction-style prompt.
+    """
+    prompt = (
+        "You are an assistant that reads a meeting or lecture transcript and extracts the KEY POINTS.\n"
+        "Key points are the most important ideas, topics discussed, and decisions made.\n\n"
+        "Transcript:\n"
+        f"{transcript_text}\n\n"
+        "Now summarize the KEY POINTS and DECISIONS as bullet points:\n"
+        "- "
+    )
+    key_points_summary = summarize_long_text(
+        prompt,
+        max_chunk_chars=3000,
+        max_len=220,
+        min_len=80
+    )
+    return key_points_summary
+# =========================
+# 4. SIDEBAR: INPUT MODE
+# =========================
+st.sidebar.header("Input Options")
+input_mode = st.sidebar.radio(
+    "Choose input type:",
+    ["Upload Audio File", "Paste Transcript Text"]
+)
+# =========================
+# 5. MAIN LOGIC
+# =========================
+transcript_text = None
+if input_mode == "Upload Audio File":
+    st.subheader("🎙 Upload your meeting / lecture recording")
+    audio_file = st.file_uploader(
+        "Upload an audio/video file (mp3, wav, m4a, mp4, etc.)",
+        type=["mp3", "wav", "m4a", "mp4"]
+    )
+    if audio_file is not None:
+        st.audio(audio_file)
+        if st.button("Transcribe & Summarize"):
+            with st.spinner("Loading Whisper model and transcribing audio..."):
+                asr_model = load_whisper_model("small")
+                # Save uploaded file to disk for Whisper
+                temp_filename = "temp_uploaded_audio"
+                with open(temp_filename, "wb") as f:
+                    f.write(audio_file.read())
+                # Transcribe
+                result = asr_model.transcribe(temp_filename, language="en")
+                transcript_text = result["text"]
+            st.success("✅ Transcription complete!")
+            st.write("### 📄 Transcript (preview)")
+            st.write(textwrap.shorten(transcript_text, width=1000, placeholder=" ..."))
+elif input_mode == "Paste Transcript Text":
+    st.subheader("📄 Paste your transcript text")
+    transcript_text = st.text_area(
+        "Paste the full transcript here (meeting, class, lecture, etc.)",
+        height=250
+    )
+    if transcript_text.strip() == "":
+        transcript_text = None
+# =========================
+# 6. RUN SUMMARIZATION ONCE WE HAVE TRANSCRIPT
+# =========================
+if transcript_text:
+    st.markdown("---")
+    st.subheader("🧠 AI Analysis")
+    if st.button("Generate Summary & Action Items"):
+        with st.spinner("Summarizing the transcript..."):
+            main_summary = summarize_long_text(transcript_text)
+        with st.spinner("Extracting action items..."):
+            action_items = extract_action_items(transcript_text)
+        with st.spinner("Extracting key points..."):
+            key_points = extract_key_points(transcript_text)
+        # Display results
+        st.markdown("### 📝 Main Summary")
+        st.write(main_summary)
+        st.markdown("### ✅ Action Items")
+        st.write(action_items)
+        st.markdown("### 📌 Key Points & Decisions")
+        st.write(key_points)
+else:
+    st.info("👆 Upload an audio file or paste a transcript to get started.")