Spaces:

Arjon07CSE
/

spf_sentiment

Sleeping

File size: 10,130 Bytes

import streamlit as st
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import json
import plotly.express as px
import re

# --- CONFIG & SETUP ---
st.set_page_config(
    page_title="BD Political Sentinel AI",
    page_icon="🇧🇩",
    layout="wide"
)

# --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
POLITICAL_CONTEXT = {
    "BNP": {
        "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
        "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
    },
    "Awami League": {
        "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
        "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
    },
    "Jamaat-e-Islami": {
        "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
        "rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
    },
    "General/Interim Govt": {
        "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
        "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
    }
}

# --- MODEL LOADER ---
@st.cache_resource
def load_model():
    # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
    model_id = "hishab/titulm-llama-3.2-3b-v2.0"
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        # Auto-detect device: use float32 for CPU stability, float16 for GPU speed
        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=dtype,
            device_map="auto"
        )
        
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=150,
            do_sample=True,
            temperature=0.2, # Low temp = Logic focused
            top_p=0.9
        )
        return pipe
    except Exception as e:
        return None

# Sidebar Status
with st.sidebar:
    st.title("⚙️ System Status")
    if torch.cuda.is_available():
        st.success("🟢 GPU Active (Fast Mode)")
    else:
        st.warning("🟠 CPU Mode (Standard Speed)")
        
    with st.spinner("Initializing AI Engine..."):
        llm = load_model()

    if not llm:
        st.error("❌ Model Failed to Load. Check HuggingFace Logs.")
        st.stop()
    else:
        st.success("✅ AI Brain Ready")

# --- HELPER FUNCTIONS ---
def clean_json_output(text):
    """Robustly extract JSON from the LLM's chatter."""
    try:
        # Find the last JSON-like structure
        matches = re.findall(r'\{.*?\}', text, re.DOTALL)
        if matches:
            return json.loads(matches[-1])
        return None
    except:
        return None

def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
    return [
        {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
        Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
        
        CRITICAL RULES:
        1. Support for {party} or '{keywords}' = POSITIVE.
        2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
        3. Support for RIVAL parties = NEGATIVE.
        4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.
        
        Examples:
        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
        - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE
        
        Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}}
        """},
        {"role": "user", "content": f"Comment: {comment_text}"}
    ]

# --- MAIN UI ---
st.title("🇧🇩 Smart Political Sentiment Analyzer")
st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")

# 1. SETUP CONTEXT
st.subheader("1. Analysis Configuration")
col1, col2 = st.columns(2)
with col1:
    target_entity = st.text_input("Target Candidate/Party Name", "BNP")
with col2:
    party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))

selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]

st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")

# 2. UPLOAD DATA
st.subheader("2. Upload Data")
uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])

if uploaded_file:
    try:
        df = pd.read_csv(uploaded_file)
        st.success(f"Loaded {len(df)} comments successfully!")
        
        # Data Cleanup & Preview
        st.dataframe(df.head(3))
        
        # Column Auto-Detection
        cols = df.columns.tolist()
        comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
        date_col = next((c for c in cols if 'date' in c.lower()), None)
        
        col_sel1, col_sel2 = st.columns(2)
        with col_sel1:
            comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
        with col_sel2:
            if date_col:
                date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
            else:
                st.write("No Date column detected.")

        # 3. RUN ANALYSIS
        if st.button("🚀 Start AI Analysis", type="primary"):
            results = []
            progress_bar = st.progress(0)
            status_text = st.empty()
            
            total = len(df)
            
            for i, row in df.iterrows():
                text = str(row[comment_col])
                
                # Basic filtering
                if len(text) < 2 or text.lower() == "nan":
                    continue
                
                # Construct Prompt
                prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)
                
                # Run Inference
                try:
                    out = llm(prompt)
                    raw_res = out[0]['generated_text'][-1]['content']
                    data = clean_json_output(raw_res)
                    
                    label = data.get("label", "NEUTRAL") if data else "ERROR"
                    reason = data.get("reasoning", "Parse Error") if data else raw_res
                except Exception as e:
                    label = "ERROR"
                    reason = str(e)
                
                # Store Result
                results.append({
                    "Date": row[date_col] if date_col else None,
                    "Comment": text,
                    "Sentiment": label,
                    "Reasoning": reason
                })
                
                # Update UI
                progress_bar.progress((i + 1) / total)
                status_text.text(f"Processing {i+1}/{total}: {label}")
            
            # 4. VISUALIZATION
            res_df = pd.DataFrame(results)
            st.divider()
            st.header("📊 Analysis Results")
            
            # Layout: Pie Chart + Time Series
            row1_1, row1_2 = st.columns([1, 2])
            
            with row1_1:
                color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
                fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
                st.plotly_chart(fig_pie, use_container_width=True)
                
                # Sentiment Score Calculation
                pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
                neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
                total_valid = pos_count + neg_count + 1 # avoid div/0
                favourability = (pos_count / total_valid) * 100
                st.metric("Favourability Score", f"{favourability:.1f}%")

            with row1_2:
                if date_col:
                    try:
                        # Convert Date and Aggregate
                        res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
                        time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')
                        
                        fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment', 
                                           title="Sentiment Trends Over Time",
                                           color_discrete_map=color_map, markers=True)
                        st.plotly_chart(fig_line, use_container_width=True)
                    except Exception as e:
                        st.warning("Could not create timeline chart (Date format issue).")
                        
            # Data Table & Download
            st.dataframe(res_df)
            csv = res_df.to_csv(index=False).encode('utf-8')
            st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")
            
    except Exception as e:
        st.error(f"Error reading CSV: {e}")