spf_sentiment / src /streamlit_app.py
Arjon07CSE's picture
Update src/streamlit_app.py
33c09e3 verified
import streamlit as st
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import json
import plotly.express as px
import re
# --- CONFIG & SETUP ---
st.set_page_config(
page_title="BD Political Sentinel AI",
page_icon="🇧🇩",
layout="wide"
)
# --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
POLITICAL_CONTEXT = {
"BNP": {
"keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
"rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
},
"Awami League": {
"keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
"rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
},
"Jamaat-e-Islami": {
"keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
"rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
},
"General/Interim Govt": {
"keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
"rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
}
}
# --- MODEL LOADER ---
@st.cache_resource
def load_model():
# Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
model_id = "hishab/titulm-llama-3.2-3b-v2.0"
try:
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Auto-detect device: use float32 for CPU stability, float16 for GPU speed
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=dtype,
device_map="auto"
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=150,
do_sample=True,
temperature=0.2, # Low temp = Logic focused
top_p=0.9
)
return pipe
except Exception as e:
return None
# Sidebar Status
with st.sidebar:
st.title("⚙️ System Status")
if torch.cuda.is_available():
st.success("🟢 GPU Active (Fast Mode)")
else:
st.warning("🟠 CPU Mode (Standard Speed)")
with st.spinner("Initializing AI Engine..."):
llm = load_model()
if not llm:
st.error("❌ Model Failed to Load. Check HuggingFace Logs.")
st.stop()
else:
st.success("✅ AI Brain Ready")
# --- HELPER FUNCTIONS ---
def clean_json_output(text):
"""Robustly extract JSON from the LLM's chatter."""
try:
# Find the last JSON-like structure
matches = re.findall(r'\{.*?\}', text, re.DOTALL)
if matches:
return json.loads(matches[-1])
return None
except:
return None
def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
return [
{"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
CRITICAL RULES:
1. Support for {party} or '{keywords}' = POSITIVE.
2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
3. Support for RIVAL parties = NEGATIVE.
4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.
Examples:
- Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
- Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
- Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE
Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}}
"""},
{"role": "user", "content": f"Comment: {comment_text}"}
]
# --- MAIN UI ---
st.title("🇧🇩 Smart Political Sentiment Analyzer")
st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")
# 1. SETUP CONTEXT
st.subheader("1. Analysis Configuration")
col1, col2 = st.columns(2)
with col1:
target_entity = st.text_input("Target Candidate/Party Name", "BNP")
with col2:
party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))
selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")
# 2. UPLOAD DATA
st.subheader("2. Upload Data")
uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
st.success(f"Loaded {len(df)} comments successfully!")
# Data Cleanup & Preview
st.dataframe(df.head(3))
# Column Auto-Detection
cols = df.columns.tolist()
comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
date_col = next((c for c in cols if 'date' in c.lower()), None)
col_sel1, col_sel2 = st.columns(2)
with col_sel1:
comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
with col_sel2:
if date_col:
date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
else:
st.write("No Date column detected.")
# 3. RUN ANALYSIS
if st.button("🚀 Start AI Analysis", type="primary"):
results = []
progress_bar = st.progress(0)
status_text = st.empty()
total = len(df)
for i, row in df.iterrows():
text = str(row[comment_col])
# Basic filtering
if len(text) < 2 or text.lower() == "nan":
continue
# Construct Prompt
prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)
# Run Inference
try:
out = llm(prompt)
raw_res = out[0]['generated_text'][-1]['content']
data = clean_json_output(raw_res)
label = data.get("label", "NEUTRAL") if data else "ERROR"
reason = data.get("reasoning", "Parse Error") if data else raw_res
except Exception as e:
label = "ERROR"
reason = str(e)
# Store Result
results.append({
"Date": row[date_col] if date_col else None,
"Comment": text,
"Sentiment": label,
"Reasoning": reason
})
# Update UI
progress_bar.progress((i + 1) / total)
status_text.text(f"Processing {i+1}/{total}: {label}")
# 4. VISUALIZATION
res_df = pd.DataFrame(results)
st.divider()
st.header("📊 Analysis Results")
# Layout: Pie Chart + Time Series
row1_1, row1_2 = st.columns([1, 2])
with row1_1:
color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
st.plotly_chart(fig_pie, use_container_width=True)
# Sentiment Score Calculation
pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
total_valid = pos_count + neg_count + 1 # avoid div/0
favourability = (pos_count / total_valid) * 100
st.metric("Favourability Score", f"{favourability:.1f}%")
with row1_2:
if date_col:
try:
# Convert Date and Aggregate
res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')
fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment',
title="Sentiment Trends Over Time",
color_discrete_map=color_map, markers=True)
st.plotly_chart(fig_line, use_container_width=True)
except Exception as e:
st.warning("Could not create timeline chart (Date format issue).")
# Data Table & Download
st.dataframe(res_df)
csv = res_df.to_csv(index=False).encode('utf-8')
st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")
except Exception as e:
st.error(f"Error reading CSV: {e}")