Spaces:

rocky250
/

Sentiment-Analysis

Sleeping

App Files Files Community

rocky250 commited on Jan 27

Commit

b03b414

verified ·

1 Parent(s): 1f375b8

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +148 -77

src/streamlit_app.py CHANGED Viewed

@@ -2,10 +2,10 @@ import streamlit as st
 import torch
 import torch.nn.functional as F
 import numpy as np
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
 from normalizer import normalize
 import torch.nn as nn
-from huggingface_hub import hf_hub_download
 st.set_page_config(page_title="Political Sentiment", layout="wide")
@@ -14,34 +14,37 @@ class BanglaPoliticalNet(nn.Module):
         super().__init__()
         self.banglabert = AutoModel.from_pretrained("csebuetnlp/banglabert")
         self.hidden_size = self.banglabert.config.hidden_size
         self.cnn_layers = nn.ModuleList([
-            nn.Conv1d(self.hidden_size, 128, kernel_size=k, padding=k//2)
             for k in [3,5,7]
         ])
         self.attention = nn.MultiheadAttention(self.hidden_size, 8, batch_first=True)
         self.classifier = nn.Sequential(
-            nn.LayerNorm(self.hidden_size),
-            nn.Dropout(0.4),
-            nn.Linear(self.hidden_size, 512),
-            nn.GELU(),
             nn.Dropout(0.3),
-            nn.Linear(512, 256),
-            nn.GELU(),
-            nn.Linear(256, num_classes)
         )
-        self.explainability_weights = nn.Parameter(torch.ones(num_classes) * 0.1)
     def forward(self, input_ids, attention_mask=None):
         bert_out = self.banglabert(input_ids, attention_mask=attention_mask).last_hidden_state
-        cnn_outs = [F.relu(cnn(bert_out.transpose(1,2)).transpose(1,2)) for cnn in self.cnn_layers]
-        cnn_concat = torch.cat(cnn_outs, dim=-1)
-        if not hasattr(self, 'cnn_proj'):
-            self.cnn_proj = nn.Linear(384, self.hidden_size).to(input_ids.device)
-        attn_input = self.cnn_proj(cnn_concat)
         attn_out, _ = self.attention(attn_input, attn_input, attn_input)
-        pooled = attn_out[:, 0, :]
-        logits = self.classifier(pooled)
-        return logits, self.explainability_weights
 st.markdown("""
 <style>
@@ -209,35 +212,34 @@ h1, h2, h3 {
 """, unsafe_allow_html=True)
 id2label = {0: 'Very Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Very Positive'}
-label_colors = { 'Very Negative': '#ef4444', 'Negative': '#f97316', 'Neutral': '#64748b', 'Positive': '#22c55e', 'Very Positive': '#16a34a' }
 @st.cache_resource
 def load_models():
     models_loaded = {}
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    standard_models = {
         "model_banglabert": "rocky250/Sentiment-banglabert",
         "model_mbert": "rocky250/Sentiment-mbert",
         "model_bbase": "rocky250/Sentiment-bbase",
-        "model_xlmr": "rocky250/Sentiment-xlmr"
     }
-    for name, repo in standard_models.items():
         try:
             tokenizer = AutoTokenizer.from_pretrained(repo)
             model = AutoModelForSequenceClassification.from_pretrained(repo)
-            models_loaded[name] = (tokenizer, model.to(device))
         except:
             continue
-    try:
-        model_path = hf_hub_download(repo_id="rocky250/bangla-political", filename="pytorch_model.bin")
-        tokenizer = AutoTokenizer.from_pretrained("rocky250/bangla-political")
-        model = BanglaPoliticalNet(num_classes=5)
-        if not hasattr(model, 'cnn_proj'):
-             model.cnn_proj = nn.Linear(384, model.hidden_size)
-        model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
-        models_loaded["bangla_political"] = (tokenizer, model.to(device))
-    except:
-        pass
     return models_loaded
 models_dict = load_models()
@@ -245,23 +247,25 @@ models_dict = load_models()
 def predict_single_model(text, model_name):
     clean_text = normalize(text)
     tokenizer, model = models_dict[model_name]
     device = next(model.parameters()).device
     inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
     with torch.no_grad():
-        if isinstance(model, BanglaPoliticalNet):
-             logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
-        else:
-            outputs = model(**inputs)
-            logits = outputs.logits
     probs = F.softmax(logits, dim=1).cpu().numpy()[0]
     pred_id = np.argmax(probs)
     prediction = id2label[pred_id]
     return prediction, probs
 def predict_ensemble(text):
     clean_text = normalize(text)
     all_probs = []
     all_predictions = []
     for name in models_dict.keys():
         try:
             pred, probs = predict_single_model(clean_text, name)
@@ -269,6 +273,7 @@ def predict_ensemble(text):
             all_predictions.append(pred)
         except:
             continue
     if all_probs:
         avg_probs = np.mean(all_probs, axis=0)
         final_pred = id2label[np.argmax(avg_probs)]
@@ -276,64 +281,130 @@ def predict_ensemble(text):
     return "Error", [], np.zeros(5)
 st.markdown("""
-<div style='text-align: center; background: rgba(255,255,255,0.1); padding: 30px; border-radius: 20px; margin-bottom: 30px; backdrop-filter: blur(20px);'>
     <h1 style='font-size: 3.5rem; margin: 0; background: linear-gradient(45deg, #ffffff, #e2e8f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;'>Political Sentiment Analysis</h1>
 </div>
 """, unsafe_allow_html=True)
 col1, col2 = st.columns([3, 1])
 with col1:
-    user_input = st.text_area("Enter Bengali political text:", height=140, placeholder="এই বক্সে বাংলা রাজনৈতিক মন্তব্য লিখুন...")
 with col2:
     st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
-    mode = st.radio("Analysis Mode:", ["Single Model", "Ensemble"], horizontal=True)
     selected_model = None
     if mode == "Single Model":
         model_options = {name: name for name in models_dict.keys()}
-        if model_options:
-            selected_model = st.selectbox("Select Model:", list(model_options.keys()), index=0)
 analyze_btn = st.button("ANALYZE SENTIMENT", type="primary", use_container_width=True)
 if analyze_btn and user_input.strip():
-    with st.spinner('Processing...'):
         if mode == "Single Model":
-            if selected_model:
-                final_res, probs = predict_single_model(user_input, selected_model)
-                c1, c2 = st.columns([1, 2])
-                with c1:
-                    st.markdown(f'<div class="main-card" style="border-top: 8px solid {label_colors[final_res]}"><div class="result-title">{selected_model}</div><div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div><div style="font-size: 18px; color: #64748b; margin-top: 15px;">Confidence: {max(probs)*100:.1f}%</div></div>', unsafe_allow_html=True)
-                with c2:
-                    st.markdown('<div class="section-header">Confidence Scores</div>', unsafe_allow_html=True)
-                    for i in range(5):
-                        lbl = id2label[i]
-                        p = probs[i] * 100
-                        clr = label_colors[lbl]
-                        st.markdown(f'<div class="prob-row"><div class="prob-label"><span>{lbl}</span><span style="color: {clr};">{p:.1f}%</span></div><div class="prob-bar-bg"><div class="prob-bar-fill" style="width: {min(p, 100)}%; background: {clr};"></div></div></div>', unsafe_allow_html=True)
         else:
             final_res, all_votes, avg_probs = predict_ensemble(user_input)
-            mc, dc = st.columns([1, 1.4])
-            with mc:
-                st.markdown(f'<div class="main-card" style="border-top: 8px solid {label_colors[final_res]}"><div class="result-title">ENSEMBLE CONSENSUS</div><div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div></div>', unsafe_allow_html=True)
                 for i in range(5):
-                    lbl = id2label[i]
-                    p = avg_probs[i] * 100
-                    clr = label_colors[lbl]
-                    st.markdown(f'<div class="prob-row"><div class="prob-label"><span>{lbl}</span><span style="color: {clr};">{p:.1f}%</span></div><div class="prob-bar-bg"><div class="prob-bar-fill" style="width: {min(p, 100)}%; background: {clr};"></div></div></div>', unsafe_allow_html=True)
-            with dc:
                 st.markdown('<div class="section-header">Individual Model Votes</div>', unsafe_allow_html=True)
-                m_cols = st.columns(2)
                 for idx, (name, vote) in enumerate(zip(list(models_dict.keys()), all_votes)):
-                    with m_cols[idx % 2]:
-                        st.markdown(f'<div class="model-card"><div class="model-name">{name}</div><div style="color: {label_colors[vote]}; font-weight: 800; font-size: 24px;">{vote}</div></div>', unsafe_allow_html=True)
 elif analyze_btn and not user_input.strip():
     st.error("অনুগ্রহ করে কিছু টেক্সট লিখুন!")
-with st.expander("Example Political Texts"):
-    examples = ["সরকারের এই নীতি দেশকে ধ্বংসের দিকে নিয়ে যাবে!", "চমৎকার সিদ্ধান্ত! দেশের জন্য গর্বিত।", "রাজনীতির কোনো পরিবর্তন হবে না"]
-    ex_cols = st.columns(3)
-    for idx, ex in enumerate(examples):
-        with ex_cols[idx]:
-            if st.button(ex[:30] + "...", use_container_width=True):
-                st.session_state.user_input = ex
                 st.rerun()

 import torch
 import torch.nn.functional as F
 import numpy as np
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from normalizer import normalize
 import torch.nn as nn
+from transformers import AutoModel
 st.set_page_config(page_title="Political Sentiment", layout="wide")
         super().__init__()
         self.banglabert = AutoModel.from_pretrained("csebuetnlp/banglabert")
         self.hidden_size = self.banglabert.config.hidden_size
         self.cnn_layers = nn.ModuleList([
+            nn.Conv1d(self.hidden_size, 128, kernel_size=k, padding=k//2)
             for k in [3,5,7]
         ])
         self.attention = nn.MultiheadAttention(self.hidden_size, 8, batch_first=True)
         self.classifier = nn.Sequential(
             nn.Dropout(0.3),
+            nn.Linear(self.hidden_size, 512),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(512, num_classes)
         )
     def forward(self, input_ids, attention_mask=None):
         bert_out = self.banglabert(input_ids, attention_mask=attention_mask).last_hidden_state
+        cnn_features = []
+        for cnn in self.cnn_layers:
+            cnn_out = cnn(bert_out.transpose(1,2)).transpose(1,2)
+            cnn_features.append(F.relu(cnn_out))
+        cnn_concat = torch.cat(cnn_features, dim=-1)
+        proj = nn.Linear(384, self.hidden_size).to(input_ids.device)
+        attn_input = proj(cnn_concat)
         attn_out, _ = self.attention(attn_input, attn_input, attn_input)
+        attn_pooled = attn_out[:, 0, :]
+        logits = self.classifier(attn_pooled)
+        return logits
 st.markdown("""
 <style>
 """, unsafe_allow_html=True)
 id2label = {0: 'Very Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Very Positive'}
+label_colors = {
+    'Very Negative': '#ef4444',
+    'Negative': '#f97316',
+    'Neutral': '#64748b',
+    'Positive': '#22c55e',
+    'Very Positive': '#16a34a'
+}
 @st.cache_resource
 def load_models():
     models_loaded = {}
+    target_models = {
         "model_banglabert": "rocky250/Sentiment-banglabert",
         "model_mbert": "rocky250/Sentiment-mbert",
         "model_bbase": "rocky250/Sentiment-bbase",
+        "model_xlmr": "rocky250/Sentiment-xlmr",
+        "bangla_political": "rocky250/bangla-political"
     }
+    for name, repo in target_models.items():
         try:
             tokenizer = AutoTokenizer.from_pretrained(repo)
             model = AutoModelForSequenceClassification.from_pretrained(repo)
+            models_loaded[name] = (tokenizer, model.to('cuda' if torch.cuda.is_available() else 'cpu'))
         except:
             continue
     return models_loaded
 models_dict = load_models()
 def predict_single_model(text, model_name):
     clean_text = normalize(text)
     tokenizer, model = models_dict[model_name]
     device = next(model.parameters()).device
     inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
     with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
     probs = F.softmax(logits, dim=1).cpu().numpy()[0]
     pred_id = np.argmax(probs)
     prediction = id2label[pred_id]
     return prediction, probs
 def predict_ensemble(text):
     clean_text = normalize(text)
     all_probs = []
     all_predictions = []
     for name in models_dict.keys():
         try:
             pred, probs = predict_single_model(clean_text, name)
             all_predictions.append(pred)
         except:
             continue
     if all_probs:
         avg_probs = np.mean(all_probs, axis=0)
         final_pred = id2label[np.argmax(avg_probs)]
     return "Error", [], np.zeros(5)
 st.markdown("""
+<div style='
+    text-align: center;
+    background: rgba(255,255,255,0.1);
+    padding: 30px;
+    border-radius: 20px;
+    margin-bottom: 30px;
+    backdrop-filter: blur(20px);
+'>
     <h1 style='font-size: 3.5rem; margin: 0; background: linear-gradient(45deg, #ffffff, #e2e8f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;'>Political Sentiment Analysis</h1>
 </div>
 """, unsafe_allow_html=True)
 col1, col2 = st.columns([3, 1])
 with col1:
+    user_input = st.text_area("Enter Bengali political text:", height=140,
+        placeholder="এই বক্সে বাংলা রাজনৈতিক মন্তব্য লিখুন...",
+        help="Type or paste Bengali political text for sentiment analysis")
 with col2:
     st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
+    mode = st.radio("Analysis Mode:",
+        ["Single Model", "Ensemble"],
+        horizontal=True)
     selected_model = None
     if mode == "Single Model":
         model_options = {name: name for name in models_dict.keys()}
+        selected_model = st.selectbox("Select Model:", list(model_options.keys()), index=0)
 analyze_btn = st.button("ANALYZE SENTIMENT", type="primary", use_container_width=True)
 if analyze_btn and user_input.strip():
+    with st.spinner('Processing with models...'):
         if mode == "Single Model":
+            model_name = selected_model
+            final_res, probs = predict_single_model(user_input, model_name)
+            col1, col2 = st.columns([1, 2])
+            with col1:
+                st.markdown(f"""
+                <div class="main-card" style="border-top: 8px solid {label_colors[final_res]}">
+                    <div class="result-title">{model_name}</div>
+                    <div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div>
+                    <div style="font-size: 18px; color: #64748b; margin-top: 15px;">Confidence: {max(probs)*100:.1f}%</div>
+                </div>
+                """, unsafe_allow_html=True)
+            with col2:
+                st.markdown('<div class="section-header">Confidence Scores</div>', unsafe_allow_html=True)
+                for i in range(5):
+                    label = id2label[i]
+                    prob = probs[i] * 100
+                    color = label_colors[label]
+                    st.markdown(f"""
+                    <div class="prob-row">
+                        <div class="prob-label">
+                            <span style="font-weight: 700;">{label}</span>
+                            <span style="font-weight: 700; color: {color};">{prob:.1f}%</span>
+                        </div>
+                        <div class="prob-bar-bg">
+                            <div class="prob-bar-fill" style="width: {min(prob, 100)}%; background: linear-gradient(90deg, {color}, {color}cc);"></div>
+                        </div>
+                    </div>
+                    """, unsafe_allow_html=True)
         else:
             final_res, all_votes, avg_probs = predict_ensemble(user_input)
+            main_col, details_col = st.columns([1, 1.4])
+            with main_col:
+                st.markdown(f"""
+                <div class="main-card" style="border-top: 8px solid {label_colors[final_res]}; box-shadow: 0 25px 50px rgba(0,0,0,0.2);">
+                    <div class="result-title" style="font-size: 18px;">ENSEMBLE CONSENSUS</div>
+                    <div class="result-value" style="color: {label_colors[final_res]}; font-size: 60px;">{final_res}</div>
+                </div>
+                """, unsafe_allow_html=True)
+                st.markdown('<div class="section-header">Ensemble Probabilities</div>', unsafe_allow_html=True)
                 for i in range(5):
+                    label = id2label[i]
+                    prob = avg_probs[i] * 100
+                    color = label_colors[label]
+                    st.markdown(f"""
+                    <div class="prob-row">
+                        <div class="prob-label">
+                            <span>{label}</span>
+                            <span style="color: {color};">{prob:.1f}%</span>
+                        </div>
+                        <div class="prob-bar-bg">
+                            <div class="prob-bar-fill" style="width: {min(prob, 100)}%; background: linear-gradient(90deg, {color}, {color}cc);"></div>
+                        </div>
+                    </div>
+                    """, unsafe_allow_html=True)
+            with details_col:
                 st.markdown('<div class="section-header">Individual Model Votes</div>', unsafe_allow_html=True)
+                model_cols = st.columns(2)
                 for idx, (name, vote) in enumerate(zip(list(models_dict.keys()), all_votes)):
+                    with model_cols[idx % 2]:
+                        color = label_colors[vote]
+                        st.markdown(f"""
+                        <div class="model-card">
+                            <div class="model-name">{name}</div>
+                            <div style="color: {color}; font-weight: 800; font-size: 24px; margin-top: 8px;">{vote}</div>
+                        </div>
+                        """, unsafe_allow_html=True)
 elif analyze_btn and not user_input.strip():
     st.error("অনুগ্রহ করে কিছু টেক্সট লিখুন!")
+with st.expander("Example Political Texts", expanded=False):
+    examples = [
+        "সরকারের এই নীতি দ���শকে ধ্বংসের দিকে নিয়ে যাবে!",
+        "চমৎকার সিদ্ধান্ত! দেশের জন্য গর্বিত। ভালো চলবে!",
+        "রাজনীতির কোনো পরিবর্তন হবে না, সব একই রকম"
+    ]
+    example_cols = st.columns(3)
+    for idx, example in enumerate(examples):
+        with example_cols[idx]:
+            if st.button(example[:40] + "..." if len(example) > 40 else example,
+                       use_container_width=True):
+                st.session_state.user_input = example
                 st.rerun()