Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import json | |
| import plotly.express as px | |
| import re | |
| # --- CONFIG & SETUP --- | |
| st.set_page_config( | |
| page_title="BD Political Sentinel AI", | |
| page_icon="🇧🇩", | |
| layout="wide" | |
| ) | |
| # --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) --- | |
| POLITICAL_CONTEXT = { | |
| "BNP": { | |
| "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona", | |
| "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson" | |
| }, | |
| "Awami League": { | |
| "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona", | |
| "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist" | |
| }, | |
| "Jamaat-e-Islami": { | |
| "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic", | |
| "rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi" | |
| }, | |
| "General/Interim Govt": { | |
| "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power", | |
| "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability" | |
| } | |
| } | |
| # --- MODEL LOADER --- | |
| def load_model(): | |
| # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU | |
| model_id = "hishab/titulm-llama-3.2-3b-v2.0" | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # Auto-detect device: use float32 for CPU stability, float16 for GPU speed | |
| dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=dtype, | |
| device_map="auto" | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=150, | |
| do_sample=True, | |
| temperature=0.2, # Low temp = Logic focused | |
| top_p=0.9 | |
| ) | |
| return pipe | |
| except Exception as e: | |
| return None | |
| # Sidebar Status | |
| with st.sidebar: | |
| st.title("⚙️ System Status") | |
| if torch.cuda.is_available(): | |
| st.success("🟢 GPU Active (Fast Mode)") | |
| else: | |
| st.warning("🟠 CPU Mode (Standard Speed)") | |
| with st.spinner("Initializing AI Engine..."): | |
| llm = load_model() | |
| if not llm: | |
| st.error("❌ Model Failed to Load. Check HuggingFace Logs.") | |
| st.stop() | |
| else: | |
| st.success("✅ AI Brain Ready") | |
| # --- HELPER FUNCTIONS --- | |
| def clean_json_output(text): | |
| """Robustly extract JSON from the LLM's chatter.""" | |
| try: | |
| # Find the last JSON-like structure | |
| matches = re.findall(r'\{.*?\}', text, re.DOTALL) | |
| if matches: | |
| return json.loads(matches[-1]) | |
| return None | |
| except: | |
| return None | |
| def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords): | |
| return [ | |
| {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer. | |
| Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}). | |
| CRITICAL RULES: | |
| 1. Support for {party} or '{keywords}' = POSITIVE. | |
| 2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE. | |
| 3. Support for RIVAL parties = NEGATIVE. | |
| 4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE. | |
| Examples: | |
| - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP) | |
| - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat) | |
| - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE | |
| Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}} | |
| """}, | |
| {"role": "user", "content": f"Comment: {comment_text}"} | |
| ] | |
| # --- MAIN UI --- | |
| st.title("🇧🇩 Smart Political Sentiment Analyzer") | |
| st.markdown("Context-Aware Analysis for Bangla & Banglish Comments") | |
| # 1. SETUP CONTEXT | |
| st.subheader("1. Analysis Configuration") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| target_entity = st.text_input("Target Candidate/Party Name", "BNP") | |
| with col2: | |
| party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys())) | |
| selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"] | |
| selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"] | |
| st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]") | |
| # 2. UPLOAD DATA | |
| st.subheader("2. Upload Data") | |
| uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"]) | |
| if uploaded_file: | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| st.success(f"Loaded {len(df)} comments successfully!") | |
| # Data Cleanup & Preview | |
| st.dataframe(df.head(3)) | |
| # Column Auto-Detection | |
| cols = df.columns.tolist() | |
| comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0]) | |
| date_col = next((c for c in cols if 'date' in c.lower()), None) | |
| col_sel1, col_sel2 = st.columns(2) | |
| with col_sel1: | |
| comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col)) | |
| with col_sel2: | |
| if date_col: | |
| date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col)) | |
| else: | |
| st.write("No Date column detected.") | |
| # 3. RUN ANALYSIS | |
| if st.button("🚀 Start AI Analysis", type="primary"): | |
| results = [] | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| total = len(df) | |
| for i, row in df.iterrows(): | |
| text = str(row[comment_col]) | |
| # Basic filtering | |
| if len(text) < 2 or text.lower() == "nan": | |
| continue | |
| # Construct Prompt | |
| prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals) | |
| # Run Inference | |
| try: | |
| out = llm(prompt) | |
| raw_res = out[0]['generated_text'][-1]['content'] | |
| data = clean_json_output(raw_res) | |
| label = data.get("label", "NEUTRAL") if data else "ERROR" | |
| reason = data.get("reasoning", "Parse Error") if data else raw_res | |
| except Exception as e: | |
| label = "ERROR" | |
| reason = str(e) | |
| # Store Result | |
| results.append({ | |
| "Date": row[date_col] if date_col else None, | |
| "Comment": text, | |
| "Sentiment": label, | |
| "Reasoning": reason | |
| }) | |
| # Update UI | |
| progress_bar.progress((i + 1) / total) | |
| status_text.text(f"Processing {i+1}/{total}: {label}") | |
| # 4. VISUALIZATION | |
| res_df = pd.DataFrame(results) | |
| st.divider() | |
| st.header("📊 Analysis Results") | |
| # Layout: Pie Chart + Time Series | |
| row1_1, row1_2 = st.columns([1, 2]) | |
| with row1_1: | |
| color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"} | |
| fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # Sentiment Score Calculation | |
| pos_count = len(res_df[res_df['Sentiment']=='POSITIVE']) | |
| neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE']) | |
| total_valid = pos_count + neg_count + 1 # avoid div/0 | |
| favourability = (pos_count / total_valid) * 100 | |
| st.metric("Favourability Score", f"{favourability:.1f}%") | |
| with row1_2: | |
| if date_col: | |
| try: | |
| # Convert Date and Aggregate | |
| res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce') | |
| time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count') | |
| fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment', | |
| title="Sentiment Trends Over Time", | |
| color_discrete_map=color_map, markers=True) | |
| st.plotly_chart(fig_line, use_container_width=True) | |
| except Exception as e: | |
| st.warning("Could not create timeline chart (Date format issue).") | |
| # Data Table & Download | |
| st.dataframe(res_df) | |
| csv = res_df.to_csv(index=False).encode('utf-8') | |
| st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv") | |
| except Exception as e: | |
| st.error(f"Error reading CSV: {e}") |