AishaniS commited on
Commit
d7d160d
·
verified ·
1 Parent(s): bc79921

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +115 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,117 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import time
3
+ import re
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
 
6
+ # 1. PAGE CONFIGURATION
7
+ st.set_page_config(page_title="WhatsApp Chat Analyzer", page_icon="📱", layout="wide")
8
+
9
+ # 2. LOAD MODEL FROM HUGGING FACE
10
+ # We use @st.cache_resource so it only downloads once
11
+ @st.cache_resource
12
+ def load_pipeline():
13
+ model_id = "AishaniS/text_summarizer" # Your specific HF repository
14
+
15
+ try:
16
+ # Load directly from the Hub
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
19
+ return pipeline("summarization", model=model, tokenizer=tokenizer)
20
+ except Exception as e:
21
+ st.error(f"Error loading model from Hugging Face: {e}")
22
+ return None
23
+
24
+ summarizer = load_pipeline()
25
+
26
+ # 3. PREPROCESSING FUNCTION (Corrected for your Date/Time format)
27
+ def clean_whatsapp_log(text):
28
+ """
29
+ Parses WhatsApp chat.
30
+ Target format: "24/12/25, 09:38 - Name: Message"
31
+ """
32
+ # Regex Breakdown:
33
+ # \d{1,2}/\d{1,2}/\d{2,4} -> Date (e.g., 24/12/25)
34
+ # ,\s -> Comma and space
35
+ # \d{1,2}:\d{2} -> Time (e.g., 09:38 or 20:43)
36
+ # \s-\s -> " - " separator
37
+ pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
38
+
39
+ clean_lines = []
40
+ lines = text.split('\n')
41
+
42
+ for line in lines:
43
+ # Filter system messages
44
+ if "<Media omitted>" in line or "Messages and calls are end-to-end encrypted" in line:
45
+ continue
46
+
47
+ # Remove timestamp
48
+ cleaned_line = re.sub(pattern, '', line).strip()
49
+
50
+ # Only add if text remains
51
+ if cleaned_line:
52
+ clean_lines.append(cleaned_line)
53
+
54
+ return "\n".join(clean_lines)
55
+
56
+ # 4. CHUNKING FUNCTION (To handle long chats)
57
+ def chunk_text(text, max_chars=2000):
58
+ chunks = []
59
+ current_chunk = ""
60
+ for line in text.split('\n'):
61
+ if len(current_chunk) + len(line) < max_chars:
62
+ current_chunk += line + "\n"
63
+ else:
64
+ chunks.append(current_chunk)
65
+ current_chunk = line + "\n"
66
+ if current_chunk:
67
+ chunks.append(current_chunk)
68
+ return chunks
69
+
70
+ # 5. MAIN UI
71
+ st.title("📱 Real-Time WhatsApp Summarizer")
72
+ st.markdown(f"**Model:** `AishaniS/text_summarizer` | **Status:** {'✅ Loaded' if summarizer else '❌ Error'}")
73
+ st.markdown("Upload your exported `_chat.txt` file to analyze conversation.")
74
+
75
+ uploaded_file = st.file_uploader("Choose a file", type=['txt'])
76
+
77
+ if uploaded_file and summarizer:
78
+ raw_text = uploaded_file.getvalue().decode("utf-8")
79
+
80
+ # Preprocess
81
+ clean_text = clean_whatsapp_log(raw_text)
82
+
83
+ # Layout: Two columns
84
+ col1, col2 = st.columns(2)
85
+
86
+ with col1:
87
+ st.subheader("📜 Processed Chat")
88
+ st.text_area("Cleaned Input", clean_text, height=400)
89
+
90
+ with col2:
91
+ st.subheader("🤖 AI Summary")
92
+ if st.button("Generate Summary"):
93
+ if not clean_text:
94
+ st.warning("Chat is empty after cleaning. Check the file format.")
95
+ else:
96
+ with st.spinner("Analyzing..."):
97
+ start_time = time.time() # Latency Timer Start
98
+
99
+ # Generate
100
+ chunks = chunk_text(clean_text)
101
+ summary_parts = []
102
+
103
+ # Summarize first 3 chunks to keep it fast
104
+ for i, chunk in enumerate(chunks[:3]):
105
+ try:
106
+ res = summarizer(chunk, max_length=128, min_length=30, do_sample=False)
107
+ summary_parts.append(res[0]['summary_text'])
108
+ except Exception as e:
109
+ st.warning(f"Could not summarize chunk {i+1}: {e}")
110
+
111
+ final_summary = " ".join(summary_parts)
112
+
113
+ end_time = time.time() # Latency Timer End
114
+ latency = end_time - start_time
115
+
116
+ st.success(final_summary)
117
+ st.info(f"⏱️ Model Latency: {latency:.2f} seconds")