|
|
import streamlit as st |
|
|
import time |
|
|
import re |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="WhatsApp Chat Analyzer", page_icon="π±", layout="wide") |
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_pipeline(): |
|
|
model_id = "AishaniS/text_summarizer" |
|
|
|
|
|
try: |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) |
|
|
return pipeline("summarization", model=model, tokenizer=tokenizer) |
|
|
except Exception as e: |
|
|
st.error(f"Error loading model from Hugging Face: {e}") |
|
|
return None |
|
|
|
|
|
summarizer = load_pipeline() |
|
|
|
|
|
|
|
|
def clean_whatsapp_log(text): |
|
|
""" |
|
|
Parses WhatsApp chat. |
|
|
Target format: "24/12/25, 09:38 - Name: Message" |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s' |
|
|
|
|
|
clean_lines = [] |
|
|
lines = text.split('\n') |
|
|
|
|
|
for line in lines: |
|
|
|
|
|
if "<Media omitted>" in line or "Messages and calls are end-to-end encrypted" in line: |
|
|
continue |
|
|
|
|
|
|
|
|
cleaned_line = re.sub(pattern, '', line).strip() |
|
|
|
|
|
|
|
|
if cleaned_line: |
|
|
clean_lines.append(cleaned_line) |
|
|
|
|
|
return "\n".join(clean_lines) |
|
|
|
|
|
|
|
|
def chunk_text(text, max_chars=2000): |
|
|
chunks = [] |
|
|
current_chunk = "" |
|
|
for line in text.split('\n'): |
|
|
if len(current_chunk) + len(line) < max_chars: |
|
|
current_chunk += line + "\n" |
|
|
else: |
|
|
chunks.append(current_chunk) |
|
|
current_chunk = line + "\n" |
|
|
if current_chunk: |
|
|
chunks.append(current_chunk) |
|
|
return chunks |
|
|
|
|
|
|
|
|
st.title("π± Real-Time WhatsApp Summarizer") |
|
|
st.markdown(f"**Model:** `AishaniS/text_summarizer` | **Status:** {'β
Loaded' if summarizer else 'β Error'}") |
|
|
st.markdown("Upload your exported `_chat.txt` file to analyze conversation.") |
|
|
|
|
|
uploaded_file = st.file_uploader("Choose a file", type=['txt']) |
|
|
|
|
|
if uploaded_file and summarizer: |
|
|
raw_text = uploaded_file.getvalue().decode("utf-8") |
|
|
|
|
|
|
|
|
clean_text = clean_whatsapp_log(raw_text) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("π Processed Chat") |
|
|
st.text_area("Cleaned Input", clean_text, height=400) |
|
|
|
|
|
with col2: |
|
|
st.subheader("π€ AI Summary") |
|
|
if st.button("Generate Summary"): |
|
|
if not clean_text: |
|
|
st.warning("Chat is empty after cleaning. Check the file format.") |
|
|
else: |
|
|
with st.spinner("Analyzing..."): |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
chunks = chunk_text(clean_text) |
|
|
summary_parts = [] |
|
|
|
|
|
|
|
|
for i, chunk in enumerate(chunks[:3]): |
|
|
try: |
|
|
res = summarizer(chunk, max_length=128, min_length=30, do_sample=False) |
|
|
summary_parts.append(res[0]['summary_text']) |
|
|
except Exception as e: |
|
|
st.warning(f"Could not summarize chunk {i+1}: {e}") |
|
|
|
|
|
final_summary = " ".join(summary_parts) |
|
|
|
|
|
end_time = time.time() |
|
|
latency = end_time - start_time |
|
|
|
|
|
st.success(final_summary) |
|
|
st.info(f"β±οΈ Model Latency: {latency:.2f} seconds") |