import streamlit as st import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import json import plotly.express as px import re # --- CONFIG & SETUP --- st.set_page_config( page_title="BD Political Sentinel AI", page_icon="🇧🇩", layout="wide" ) # --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) --- POLITICAL_CONTEXT = { "BNP": { "keywords": "āϧāĻžāύ⧇āϰ āĻļā§€āώ, āϜāĻŋāĻ¨ā§āĻĻāĻžāĻŦāĻžāĻĻ, āϜāĻŋ⧟āĻžāϰ āϏ⧈āύāĻŋāĻ•, āĻĻ⧇āĻļāύ⧇āĻ¤ā§āϰ⧀, āϤāĻžāϰ⧇āĻ•, Sheaf of Paddy, BNP, 71 chetona", "rival_keywords": "āύ⧌āĻ•āĻž, āĻ­ā§‹āϟ āĻšā§‹āϰ, āĻšāĻžāϏāĻŋāύāĻž, āϞ⧀āĻ—, āϚāĻžāρāĻĻāĻžāĻŦāĻžāϜ, āϚāĻžāĻ¨ā§āĻĻāĻž, āĻĻ⧁āĻ°ā§āύ⧀āϤāĻŋ, terrorist, arson" }, "Awami League": { "keywords": "āύ⧌āĻ•āĻž, āϜ⧟ āĻŦāĻžāĻ‚āϞāĻž, āĻŽā§āϜāĻŋāĻŦ, āĻšāĻžāϏāĻŋāύāĻž, āĻļ⧇āϖ⧇āϰ āĻŦ⧇āϟāĻŋ, Boat, development, 71 er chetona", "rival_keywords": "āϧāĻžāύ⧇āϰ āĻļā§€āώ, āĻšā§‹āϰ, āĻŦāĻŋāĻāύāĻĒāĻŋ, āϜāĻžāĻŽāĻžā§ŸāĻžāϤ, rajakar, killer, dictator, fascist" }, "Jamaat-e-Islami": { "keywords": "āĻĻāĻžā§œāĻŋāĻĒāĻžāĻ˛ā§āϞāĻž, āφāĻ˛ā§āϞāĻžāĻš, āύāĻžāϰāĻžā§Ÿā§‡ āϤāĻžāĻ•āĻŦāĻŋāϰ, āĻĻā§āĻŦā§€āύ, āχāϏāϞāĻžāĻŽ, Mamunul, Jammat, Shibir, Islamic", "rival_keywords": "āύāĻžāĻ¸ā§āϤāĻŋāĻ•, āϞ⧀āĻ—, āĻļāĻžāĻšāĻŦāĻžāĻ—, rajakar, war criminal, terrorist, jongi" }, "General/Interim Govt": { "keywords": "āχāωāύ⧂āϏ, āĻ›āĻžāĻ¤ā§āϰ āϏāĻŽāĻžāϜ, āϏāĻ‚āĻ¸ā§āĻ•āĻžāϰ, āĻœā§‡āύāĻžāϰ⧇āĻļāύ āĻœā§‡āĻĄ, āχāύāϏāĻžāĻĢ, Yunus, Student Power", "rival_keywords": "āĻ¸ā§āĻŦ⧈āϰāĻžāϚāĻžāϰ, āĻĢā§āϝāĻžāϏāĻŋāĻ¸ā§āϟ, āĻšāĻžāϏāĻŋāύāĻž, anarchy, instability" } } # --- MODEL LOADER --- @st.cache_resource def load_model(): # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU model_id = "hishab/titulm-llama-3.2-3b-v2.0" try: tokenizer = AutoTokenizer.from_pretrained(model_id) # Auto-detect device: use float32 for CPU stability, float16 for GPU speed dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=dtype, device_map="auto" ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, do_sample=True, temperature=0.2, # Low temp = Logic focused top_p=0.9 ) return pipe except Exception as e: return None # Sidebar Status with st.sidebar: st.title("âš™ī¸ System Status") if torch.cuda.is_available(): st.success("đŸŸĸ GPU Active (Fast Mode)") else: st.warning("🟠 CPU Mode (Standard Speed)") with st.spinner("Initializing AI Engine..."): llm = load_model() if not llm: st.error("❌ Model Failed to Load. Check HuggingFace Logs.") st.stop() else: st.success("✅ AI Brain Ready") # --- HELPER FUNCTIONS --- def clean_json_output(text): """Robustly extract JSON from the LLM's chatter.""" try: # Find the last JSON-like structure matches = re.findall(r'\{.*?\}', text, re.DOTALL) if matches: return json.loads(matches[-1]) return None except: return None def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords): return [ {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer. Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}). CRITICAL RULES: 1. Support for {party} or '{keywords}' = POSITIVE. 2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE. 3. Support for RIVAL parties = NEGATIVE. 4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE. Examples: - Input: "Jammat shibir boycott ❌ Bnp đŸĨ°" (Target: BNP) -> POSITIVE (Loves BNP) - Input: "Jammat shibir boycott ❌ Bnp đŸĨ°" (Target: Jamaat) -> NEGATIVE (Hates Jamaat) - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}} """}, {"role": "user", "content": f"Comment: {comment_text}"} ] # --- MAIN UI --- st.title("🇧🇩 Smart Political Sentiment Analyzer") st.markdown("Context-Aware Analysis for Bangla & Banglish Comments") # 1. SETUP CONTEXT st.subheader("1. Analysis Configuration") col1, col2 = st.columns(2) with col1: target_entity = st.text_input("Target Candidate/Party Name", "BNP") with col2: party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys())) selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"] selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"] st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]") # 2. UPLOAD DATA st.subheader("2. Upload Data") uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"]) if uploaded_file: try: df = pd.read_csv(uploaded_file) st.success(f"Loaded {len(df)} comments successfully!") # Data Cleanup & Preview st.dataframe(df.head(3)) # Column Auto-Detection cols = df.columns.tolist() comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0]) date_col = next((c for c in cols if 'date' in c.lower()), None) col_sel1, col_sel2 = st.columns(2) with col_sel1: comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col)) with col_sel2: if date_col: date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col)) else: st.write("No Date column detected.") # 3. RUN ANALYSIS if st.button("🚀 Start AI Analysis", type="primary"): results = [] progress_bar = st.progress(0) status_text = st.empty() total = len(df) for i, row in df.iterrows(): text = str(row[comment_col]) # Basic filtering if len(text) < 2 or text.lower() == "nan": continue # Construct Prompt prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals) # Run Inference try: out = llm(prompt) raw_res = out[0]['generated_text'][-1]['content'] data = clean_json_output(raw_res) label = data.get("label", "NEUTRAL") if data else "ERROR" reason = data.get("reasoning", "Parse Error") if data else raw_res except Exception as e: label = "ERROR" reason = str(e) # Store Result results.append({ "Date": row[date_col] if date_col else None, "Comment": text, "Sentiment": label, "Reasoning": reason }) # Update UI progress_bar.progress((i + 1) / total) status_text.text(f"Processing {i+1}/{total}: {label}") # 4. VISUALIZATION res_df = pd.DataFrame(results) st.divider() st.header("📊 Analysis Results") # Layout: Pie Chart + Time Series row1_1, row1_2 = st.columns([1, 2]) with row1_1: color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"} fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map) st.plotly_chart(fig_pie, use_container_width=True) # Sentiment Score Calculation pos_count = len(res_df[res_df['Sentiment']=='POSITIVE']) neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE']) total_valid = pos_count + neg_count + 1 # avoid div/0 favourability = (pos_count / total_valid) * 100 st.metric("Favourability Score", f"{favourability:.1f}%") with row1_2: if date_col: try: # Convert Date and Aggregate res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce') time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count') fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment', title="Sentiment Trends Over Time", color_discrete_map=color_map, markers=True) st.plotly_chart(fig_line, use_container_width=True) except Exception as e: st.warning("Could not create timeline chart (Date format issue).") # Data Table & Download st.dataframe(res_df) csv = res_df.to_csv(index=False).encode('utf-8') st.download_button("đŸ“Ĩ Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv") except Exception as e: st.error(f"Error reading CSV: {e}")