Spaces:

AishaniS
/

WhatsAppChatSummarizer

Sleeping

App Files Files Community

AishaniS commited on Dec 29, 2025

Commit

d7d160d

verified ·

1 Parent(s): bc79921

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +115 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,117 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import time
+import re
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+# 1. PAGE CONFIGURATION
+st.set_page_config(page_title="WhatsApp Chat Analyzer", page_icon="📱", layout="wide")
+# 2. LOAD MODEL FROM HUGGING FACE
+# We use @st.cache_resource so it only downloads once
+@st.cache_resource
+def load_pipeline():
+    model_id = "AishaniS/text_summarizer"  # Your specific HF repository
+    try:
+        # Load directly from the Hub
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+        return pipeline("summarization", model=model, tokenizer=tokenizer)
+    except Exception as e:
+        st.error(f"Error loading model from Hugging Face: {e}")
+        return None
+summarizer = load_pipeline()
+# 3. PREPROCESSING FUNCTION (Corrected for your Date/Time format)
+def clean_whatsapp_log(text):
+    """
+    Parses WhatsApp chat.
+    Target format: "24/12/25, 09:38 - Name: Message"
+    """
+    # Regex Breakdown:
+    # \d{1,2}/\d{1,2}/\d{2,4}  -> Date (e.g., 24/12/25)
+    # ,\s                      -> Comma and space
+    # \d{1,2}:\d{2}            -> Time (e.g., 09:38 or 20:43)
+    # \s-\s                    -> " - " separator
+    pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
+    clean_lines = []
+    lines = text.split('\n')
+    for line in lines:
+        # Filter system messages
+        if "<Media omitted>" in line or "Messages and calls are end-to-end encrypted" in line:
+            continue
+        # Remove timestamp
+        cleaned_line = re.sub(pattern, '', line).strip()
+        # Only add if text remains
+        if cleaned_line:
+            clean_lines.append(cleaned_line)
+    return "\n".join(clean_lines)
+# 4. CHUNKING FUNCTION (To handle long chats)
+def chunk_text(text, max_chars=2000):
+    chunks = []
+    current_chunk = ""
+    for line in text.split('\n'):
+        if len(current_chunk) + len(line) < max_chars:
+            current_chunk += line + "\n"
+        else:
+            chunks.append(current_chunk)
+            current_chunk = line + "\n"
+    if current_chunk:
+        chunks.append(current_chunk)
+    return chunks
+# 5. MAIN UI
+st.title("📱 Real-Time WhatsApp Summarizer")
+st.markdown(f"**Model:** `AishaniS/text_summarizer` | **Status:** {'✅ Loaded' if summarizer else '❌ Error'}")
+st.markdown("Upload your exported `_chat.txt` file to analyze conversation.")
+uploaded_file = st.file_uploader("Choose a file", type=['txt'])
+if uploaded_file and summarizer:
+    raw_text = uploaded_file.getvalue().decode("utf-8")
+    # Preprocess
+    clean_text = clean_whatsapp_log(raw_text)
+    # Layout: Two columns
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("📜 Processed Chat")
+        st.text_area("Cleaned Input", clean_text, height=400)
+    with col2:
+        st.subheader("🤖 AI Summary")
+        if st.button("Generate Summary"):
+            if not clean_text:
+                st.warning("Chat is empty after cleaning. Check the file format.")
+            else:
+                with st.spinner("Analyzing..."):
+                    start_time = time.time() # Latency Timer Start
+                    # Generate
+                    chunks = chunk_text(clean_text)
+                    summary_parts = []
+                    # Summarize first 3 chunks to keep it fast
+                    for i, chunk in enumerate(chunks[:3]):
+                        try:
+                            res = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
+                            summary_parts.append(res[0]['summary_text'])
+                        except Exception as e:
+                            st.warning(f"Could not summarize chunk {i+1}: {e}")
+                    final_summary = " ".join(summary_parts)
+                    end_time = time.time() # Latency Timer End
+                    latency = end_time - start_time
+                    st.success(final_summary)
+                    st.info(f"⏱️ Model Latency: {latency:.2f} seconds")