Spaces:

Arjon07CSE
/

spf_sentiment

Sleeping

App Files Files Community

Arjon07CSE commited on 27 days ago

Commit

8367aa6

verified ·

1 Parent(s): 93ad989

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +280 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,282 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import json
+import plotly.express as px
+import re
+# --- CONFIG & SETUP ---
+st.set_page_config(
+    page_title="BD Political Sentinel AI",
+    page_icon="🇧🇩",
+    layout="wide"
+)
+# --- KEYWORD DATABASE (To make the AI Smarter) ---
+# This dictionary helps the AI explicitly understand symbols associated with parties.
+POLITICAL_CONTEXT = {
+    "BNP": {
+        "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy",
+        "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ"
+    },
+    "Awami League": {
+        "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat",
+        "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত"
+    },
+    "Jamaat-e-Islami": {
+        "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul",
+        "rival_keywords": "নাস্তিক, লীগ, শাহবাগ"
+    },
+    "General/Interim Govt": {
+        "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ",
+        "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা"
+    }
+}
+# --- MODEL LOADER ---
+@st.cache_resource
+def load_model():
+    model_id = "hishab/titulm-llama-3.2-3b-v2.0"
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        # Load in 4-bit or float16 depending on available hardware
+        # For Hugging Face Spaces (CPU), we use float32 or float16.
+        # For GPU, float16 is best.
+        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=dtype,
+            device_map="auto"
+        )
+        pipe = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=150, # Keep it short for JSON
+            do_sample=True,
+            temperature=0.2, # Lower temperature = More strict/logical
+            top_p=0.9
+        )
+        return pipe
+    except Exception as e:
+        return None
+# Load Model
+with st.sidebar:
+    st.image("https://cdn-icons-png.flaticon.com/512/6656/6656046.png", width=50)
+    st.title("AI Settings")
+    if torch.cuda.is_available():
+        st.success("🚀 GPU Detected! Inference will be fast.")
+    else:
+        st.warning("⚠️ Running on CPU. Inference might be slow.")
+    with st.spinner("Waking up the Neural Network..."):
+        llm = load_model()
+    if not llm:
+        st.error("Model failed to load.")
+        st.stop()
+# --- HELPER FUNCTIONS ---
+def clean_json_output(text):
+    """Robustly extract JSON from the LLM's chatter."""
+    # Look for the last occurrence of { and the matching }
+    try:
+        # Regex to find JSON block
+        matches = re.findall(r'\{.*?\}', text, re.DOTALL)
+        if matches:
+            # Get the last match as it's usually the actual answer after the reasoning
+            return json.loads(matches[-1])
+        else:
+            return None
+    except:
+        return None
+# --- PROMPT GENERATORS ---
+def generate_news_prompt(news_text, target):
+    return [
+        {"role": "system", "content": f"""You are a Political Analyst for Bangladesh.
+        Task: Analyze if the news is FAVOURABLE or UNFAVORABLE for: {target}.
+        DEFINITIONS:
+        - FAVOURABLE: Positive news, legal wins, return to power, praise.
+        - UNFAVORABLE: Negative news, arrest, criticism, loss.
+        - NEUTRAL: Factual news with no clear bias.
+        Response Format: JSON only -> {{"label": "FAVOURABLE"|"UNFAVORABLE"|"NEUTRAL", "reasoning": "Bangla sentence"}}
+        """},
+        {"role": "user", "content": f"News: {news_text}"}
+    ]
+def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
+    return [
+        {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
+        Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
+        RULES:
+        1. If comment mentions {party} symbols ({keywords}) or praises {target} -> POSITIVE.
+        2. If comment supports {party}'s rivals ({rival_keywords}) or attacks {target} -> NEGATIVE.
+        3. If comment is sarcastic (mocking praise) -> NEGATIVE.
+        Examples:
+        - Comment: "Zindabad!" (Context: {party}) -> POSITIVE
+        - Comment: "Chor!" (Context: {party}) -> NEGATIVE
+        Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short Bangla explanation"}}
+        """},
+        {"role": "user", "content": f"Comment: {comment_text}"}
+    ]
+# --- MAIN UI ---
+st.title("🇧🇩 Smart Political Sentiment Analyzer")
+st.markdown("Context-Aware Analysis for Bangladesh Politics")
+# Tabs for the two sections
+tab_news, tab_comments = st.tabs(["📰 Political News Analysis", "📣 Public Sentiment (Comments)"])
+# =======================
+# SECTION 1: NEWS
+# =======================
+with tab_news:
+    st.header("Is this news Good or Bad for the Candidate?")
+    col1, col2 = st.columns(2)
+    with col1:
+        target_name_news = st.text_input("Candidate Name (Who is this about?)", "তারেক রহমান")
+    with col2:
+        news_input_method = st.radio("Input Method", ["Paste Text", "Upload CSV"])
+    if news_input_method == "Paste Text":
+        news_text = st.text_area("Paste News Headline:", height=100)
+        if st.button("Analyze News Impact", type="primary"):
+            if news_text:
+                with st.spinner("Analyzing impact..."):
+                    prompt = generate_news_prompt(news_text, target_name_news)
+                    res = llm(prompt)
+                    output_text = res[0]['generated_text'][-1]['content']
+                    data = clean_json_output(output_text)
+                    if data:
+                        st.subheader(f"Result: {data.get('label', 'ERROR')}")
+                        st.write(f"**Reasoning:** {data.get('reasoning', '')}")
+                    else:
+                        st.error("Could not parse AI response.")
+                        st.code(output_text)
+    elif news_input_method == "Upload CSV":
+        uploaded_news = st.file_uploader("Upload News CSV", type=["csv"])
+        if uploaded_news:
+            df_news = pd.read_csv(uploaded_news)
+            text_col = st.selectbox("Select Headline Column", df_news.columns)
+            if st.button("Analyze Batch News"):
+                results = []
+                prog_bar = st.progress(0)
+                for i, row in df_news.iterrows():
+                    prompt = generate_news_prompt(str(row[text_col]), target_name_news)
+                    res = llm(prompt)
+                    data = clean_json_output(res[0]['generated_text'][-1]['content'])
+                    results.append({
+                        "News": row[text_col],
+                        "Impact": data['label'] if data else "ERROR",
+                        "Reasoning": data['reasoning'] if data else ""
+                    })
+                    prog_bar.progress((i+1)/len(df_news))
+                res_df = pd.DataFrame(results)
+                st.dataframe(res_df)
+                # Chart
+                fig = px.pie(res_df, names="Impact", title=f"Media Sentiment for {target_name_news}")
+                st.plotly_chart(fig)
+# =======================
+# SECTION 2: COMMENTS
+# =======================
+with tab_comments:
+    st.header("Context-Aware Comment Labeling")
+    st.info("The AI uses the 'Target Party' to understand slogans like 'Dhaner Sheesh' or 'Nouka'.")
+    # 1. ESTABLISH CONTEXT
+    c1, c2 = st.columns(2)
+    with c1:
+        target_entity_cmt = st.text_input("Target Person (e.g., Khaleda Zia)", "Khaleda Zia")
+    with c2:
+        party_context = st.selectbox("Political Affiliation (Defines Symbols)", list(POLITICAL_CONTEXT.keys()))
+    # Get keywords based on selection
+    selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
+    selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
+    st.caption(f"**AI Context Memory:** Positive Keywords = [{selected_keywords}] | Negative Keywords = [{selected_rivals}]")
+    # 2. INPUT
+    uploaded_comments = st.file_uploader("Upload Comments CSV", type=["csv"], key="cmt_up")
+    if uploaded_comments:
+        df_cmt = pd.read_csv(uploaded_comments)
+        st.write("Preview:", df_cmt.head(3))
+        comment_col = st.selectbox("Which column contains the comments?", df_cmt.columns)
+        if st.button("Start Intelligent Labeling", type="primary"):
+            final_data = []
+            bar = st.progress(0)
+            total = len(df_cmt)
+            for idx, row in df_cmt.iterrows():
+                txt = str(row[comment_col])
+                # Skip empty or very short comments
+                if len(txt) < 3:
+                    continue
+                prompt = generate_comment_prompt(txt, target_entity_cmt, party_context, selected_keywords, selected_rivals)
+                try:
+                    out = llm(prompt)
+                    raw_str = out[0]['generated_text'][-1]['content']
+                    json_dat = clean_json_output(raw_str)
+                    label = json_dat.get("label", "NEUTRAL") if json_dat else "ERROR"
+                    reason = json_dat.get("reasoning", "Parse Fail") if json_dat else raw_str
+                except Exception as e:
+                    label = "ERROR"
+                    reason = str(e)
+                final_data.append({
+                    "Original Comment": txt,
+                    "Sentiment": label,
+                    "Why?": reason
+                })
+                bar.progress((idx+1)/total)
+            # RESULTS
+            res_df_cmt = pd.DataFrame(final_data)
+            st.success("Analysis Complete!")
+            # Visualization
+            row1, row2 = st.columns([2, 1])
+            with row1:
+                st.dataframe(res_df_cmt)
+            with row2:
+                # Custom colors for politics
+                color_map = {
+                    "POSITIVE": "#00CC96", # Green
+                    "NEGATIVE": "#EF553B", # Red
+                    "NEUTRAL": "#636EFA",  # Blue
+                    "ERROR": "#000000"
+                }
+                fig = px.pie(res_df_cmt, names="Sentiment", title="Public Sentiment", color="Sentiment", color_discrete_map=color_map)
+                st.plotly_chart(fig)
+            # Download
+            csv_dl = res_df_cmt.to_csv(index=False).encode('utf-8')
+            st.download_button("Download Labeled Data", csv_dl, "analyzed_comments.csv", "text/csv")