File size: 10,130 Bytes
81354c4
8367aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
33c09e3
8367aa6
 
33c09e3
 
8367aa6
 
33c09e3
 
8367aa6
 
33c09e3
 
8367aa6
 
33c09e3
 
8367aa6
 
 
 
 
 
33c09e3
8367aa6
 
 
33c09e3
8367aa6
 
 
 
 
 
 
 
 
 
 
 
33c09e3
8367aa6
33c09e3
8367aa6
 
 
 
 
 
33c09e3
8367aa6
33c09e3
8367aa6
33c09e3
8367aa6
33c09e3
8367aa6
33c09e3
8367aa6
 
 
33c09e3
8367aa6
33c09e3
 
8367aa6
 
 
 
 
33c09e3
8367aa6
 
 
33c09e3
8367aa6
 
 
 
 
 
 
 
33c09e3
 
 
 
 
8367aa6
 
33c09e3
 
 
8367aa6
33c09e3
8367aa6
 
 
 
 
 
33c09e3
8367aa6
33c09e3
 
 
 
 
 
 
8367aa6
33c09e3
 
8367aa6
33c09e3
 
 
 
 
81354c4
33c09e3
 
 
 
 
 
 
8367aa6
33c09e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8367aa6
33c09e3
 
 
 
8367aa6
33c09e3
 
8367aa6
 
33c09e3
 
 
 
8367aa6
 
33c09e3
 
8367aa6
33c09e3
 
8367aa6
 
 
 
33c09e3
 
 
 
8367aa6
33c09e3
8367aa6
33c09e3
 
 
 
8367aa6
33c09e3
 
 
 
8367aa6
33c09e3
 
8367aa6
33c09e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import streamlit as st
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import json
import plotly.express as px
import re

# --- CONFIG & SETUP ---
st.set_page_config(
    page_title="BD Political Sentinel AI",
    page_icon="🇧🇩",
    layout="wide"
)

# --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
POLITICAL_CONTEXT = {
    "BNP": {
        "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
        "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
    },
    "Awami League": {
        "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
        "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
    },
    "Jamaat-e-Islami": {
        "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
        "rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
    },
    "General/Interim Govt": {
        "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
        "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
    }
}

# --- MODEL LOADER ---
@st.cache_resource
def load_model():
    # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
    model_id = "hishab/titulm-llama-3.2-3b-v2.0"
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        # Auto-detect device: use float32 for CPU stability, float16 for GPU speed
        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=dtype,
            device_map="auto"
        )
        
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=150,
            do_sample=True,
            temperature=0.2, # Low temp = Logic focused
            top_p=0.9
        )
        return pipe
    except Exception as e:
        return None

# Sidebar Status
with st.sidebar:
    st.title("⚙️ System Status")
    if torch.cuda.is_available():
        st.success("🟢 GPU Active (Fast Mode)")
    else:
        st.warning("🟠 CPU Mode (Standard Speed)")
        
    with st.spinner("Initializing AI Engine..."):
        llm = load_model()

    if not llm:
        st.error("❌ Model Failed to Load. Check HuggingFace Logs.")
        st.stop()
    else:
        st.success("✅ AI Brain Ready")

# --- HELPER FUNCTIONS ---
def clean_json_output(text):
    """Robustly extract JSON from the LLM's chatter."""
    try:
        # Find the last JSON-like structure
        matches = re.findall(r'\{.*?\}', text, re.DOTALL)
        if matches:
            return json.loads(matches[-1])
        return None
    except:
        return None

def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
    return [
        {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
        Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
        
        CRITICAL RULES:
        1. Support for {party} or '{keywords}' = POSITIVE.
        2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
        3. Support for RIVAL parties = NEGATIVE.
        4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.
        
        Examples:
        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
        - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE
        
        Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}}
        """},
        {"role": "user", "content": f"Comment: {comment_text}"}
    ]

# --- MAIN UI ---
st.title("🇧🇩 Smart Political Sentiment Analyzer")
st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")

# 1. SETUP CONTEXT
st.subheader("1. Analysis Configuration")
col1, col2 = st.columns(2)
with col1:
    target_entity = st.text_input("Target Candidate/Party Name", "BNP")
with col2:
    party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))

selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]

st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")

# 2. UPLOAD DATA
st.subheader("2. Upload Data")
uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])

if uploaded_file:
    try:
        df = pd.read_csv(uploaded_file)
        st.success(f"Loaded {len(df)} comments successfully!")
        
        # Data Cleanup & Preview
        st.dataframe(df.head(3))
        
        # Column Auto-Detection
        cols = df.columns.tolist()
        comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
        date_col = next((c for c in cols if 'date' in c.lower()), None)
        
        col_sel1, col_sel2 = st.columns(2)
        with col_sel1:
            comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
        with col_sel2:
            if date_col:
                date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
            else:
                st.write("No Date column detected.")

        # 3. RUN ANALYSIS
        if st.button("🚀 Start AI Analysis", type="primary"):
            results = []
            progress_bar = st.progress(0)
            status_text = st.empty()
            
            total = len(df)
            
            for i, row in df.iterrows():
                text = str(row[comment_col])
                
                # Basic filtering
                if len(text) < 2 or text.lower() == "nan":
                    continue
                
                # Construct Prompt
                prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)
                
                # Run Inference
                try:
                    out = llm(prompt)
                    raw_res = out[0]['generated_text'][-1]['content']
                    data = clean_json_output(raw_res)
                    
                    label = data.get("label", "NEUTRAL") if data else "ERROR"
                    reason = data.get("reasoning", "Parse Error") if data else raw_res
                except Exception as e:
                    label = "ERROR"
                    reason = str(e)
                
                # Store Result
                results.append({
                    "Date": row[date_col] if date_col else None,
                    "Comment": text,
                    "Sentiment": label,
                    "Reasoning": reason
                })
                
                # Update UI
                progress_bar.progress((i + 1) / total)
                status_text.text(f"Processing {i+1}/{total}: {label}")
            
            # 4. VISUALIZATION
            res_df = pd.DataFrame(results)
            st.divider()
            st.header("📊 Analysis Results")
            
            # Layout: Pie Chart + Time Series
            row1_1, row1_2 = st.columns([1, 2])
            
            with row1_1:
                color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
                fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
                st.plotly_chart(fig_pie, use_container_width=True)
                
                # Sentiment Score Calculation
                pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
                neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
                total_valid = pos_count + neg_count + 1 # avoid div/0
                favourability = (pos_count / total_valid) * 100
                st.metric("Favourability Score", f"{favourability:.1f}%")

            with row1_2:
                if date_col:
                    try:
                        # Convert Date and Aggregate
                        res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
                        time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')
                        
                        fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment', 
                                           title="Sentiment Trends Over Time",
                                           color_discrete_map=color_map, markers=True)
                        st.plotly_chart(fig_line, use_container_width=True)
                    except Exception as e:
                        st.warning("Could not create timeline chart (Date format issue).")
                        
            # Data Table & Download
            st.dataframe(res_df)
            csv = res_df.to_csv(index=False).encode('utf-8')
            st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")
            
    except Exception as e:
        st.error(f"Error reading CSV: {e}")