Spaces:

rocky250
/

Sentiment-Analysis

Sleeping

App Files Files Community

rocky250 commited on Jan 27

Commit

1f375b8

verified ·

1 Parent(s): da258d7

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +34 -142

src/streamlit_app.py CHANGED Viewed

@@ -13,14 +13,11 @@ class BanglaPoliticalNet(nn.Module):
     def __init__(self, num_classes=5):
         super().__init__()
         self.banglabert = AutoModel.from_pretrained("csebuetnlp/banglabert")
         self.hidden_size = self.banglabert.config.hidden_size
         self.cnn_layers = nn.ModuleList([
             nn.Conv1d(self.hidden_size, 128, kernel_size=k, padding=k//2)
             for k in [3,5,7]
         ])
         self.attention = nn.MultiheadAttention(self.hidden_size, 8, batch_first=True)
         self.classifier = nn.Sequential(
             nn.LayerNorm(self.hidden_size),
@@ -32,27 +29,20 @@ class BanglaPoliticalNet(nn.Module):
             nn.GELU(),
             nn.Linear(256, num_classes)
         )
         self.explainability_weights = nn.Parameter(torch.ones(num_classes) * 0.1)
     def forward(self, input_ids, attention_mask=None):
         bert_out = self.banglabert(input_ids, attention_mask=attention_mask).last_hidden_state
         cnn_outs = [F.relu(cnn(bert_out.transpose(1,2)).transpose(1,2)) for cnn in self.cnn_layers]
         cnn_concat = torch.cat(cnn_outs, dim=-1)
         if not hasattr(self, 'cnn_proj'):
             self.cnn_proj = nn.Linear(384, self.hidden_size).to(input_ids.device)
         attn_input = self.cnn_proj(cnn_concat)
         attn_out, _ = self.attention(attn_input, attn_input, attn_input)
         pooled = attn_out[:, 0, :]
         logits = self.classifier(pooled)
         return logits, self.explainability_weights
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
@@ -219,55 +209,35 @@ h1, h2, h3 {
 """, unsafe_allow_html=True)
 id2label = {0: 'Very Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Very Positive'}
-label_colors = {
-    'Very Negative': '#ef4444',
-    'Negative': '#f97316',
-    'Neutral': '#64748b',
-    'Positive': '#22c55e',
-    'Very Positive': '#16a34a'
-}
 @st.cache_resource
 def load_models():
     models_loaded = {}
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     standard_models = {
         "model_banglabert": "rocky250/Sentiment-banglabert",
         "model_mbert": "rocky250/Sentiment-mbert",
         "model_bbase": "rocky250/Sentiment-bbase",
         "model_xlmr": "rocky250/Sentiment-xlmr"
     }
     for name, repo in standard_models.items():
         try:
             tokenizer = AutoTokenizer.from_pretrained(repo)
             model = AutoModelForSequenceClassification.from_pretrained(repo)
             models_loaded[name] = (tokenizer, model.to(device))
-        except Exception as e:
-            print(f"Skipped {name}: {e}")
             continue
     try:
         model_path = hf_hub_download(repo_id="rocky250/bangla-political", filename="pytorch_model.bin")
         tokenizer = AutoTokenizer.from_pretrained("rocky250/bangla-political")
         model = BanglaPoliticalNet(num_classes=5)
         if not hasattr(model, 'cnn_proj'):
              model.cnn_proj = nn.Linear(384, model.hidden_size)
         model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
         models_loaded["bangla_political"] = (tokenizer, model.to(device))
-    except Exception as e:
-        print(f"Skipped bangla_political: {e}")
     return models_loaded
 models_dict = load_models()
@@ -275,28 +245,23 @@ models_dict = load_models()
 def predict_single_model(text, model_name):
     clean_text = normalize(text)
     tokenizer, model = models_dict[model_name]
     device = next(model.parameters()).device
     inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
     with torch.no_grad():
         if isinstance(model, BanglaPoliticalNet):
              logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
         else:
             outputs = model(**inputs)
             logits = outputs.logits
     probs = F.softmax(logits, dim=1).cpu().numpy()[0]
     pred_id = np.argmax(probs)
     prediction = id2label[pred_id]
     return prediction, probs
 def predict_ensemble(text):
     clean_text = normalize(text)
     all_probs = []
     all_predictions = []
     for name in models_dict.keys():
         try:
             pred, probs = predict_single_model(clean_text, name)
@@ -304,144 +269,71 @@ def predict_ensemble(text):
             all_predictions.append(pred)
         except:
             continue
     if all_probs:
         avg_probs = np.mean(all_probs, axis=0)
         final_pred = id2label[np.argmax(avg_probs)]
         return final_pred, all_predictions, avg_probs
     return "Error", [], np.zeros(5)
 st.markdown("""
-<div style='
-    text-align: center;
-    background: rgba(255,255,255,0.1);
-    padding: 30px;
-    border-radius: 20px;
-    margin-bottom: 30px;
-    backdrop-filter: blur(20px);
-'>
     <h1 style='font-size: 3.5rem; margin: 0; background: linear-gradient(45deg, #ffffff, #e2e8f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;'>Political Sentiment Analysis</h1>
 </div>
 """, unsafe_allow_html=True)
 col1, col2 = st.columns([3, 1])
 with col1:
-    user_input = st.text_area("Enter Bengali political text:", height=140,
-        placeholder="এই বক্সে বাংলা রাজনৈতিক মন্তব্য লিখুন...",
-        help="Type or paste Bengali political text for sentiment analysis")
 with col2:
     st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
-    mode = st.radio("Analysis Mode:",
-        ["Single Model", "Ensemble"],
-        horizontal=True)
     selected_model = None
     if mode == "Single Model":
         model_options = {name: name for name in models_dict.keys()}
         if model_options:
             selected_model = st.selectbox("Select Model:", list(model_options.keys()), index=0)
-        else:
-            st.warning("No models loaded.")
 analyze_btn = st.button("ANALYZE SENTIMENT", type="primary", use_container_width=True)
 if analyze_btn and user_input.strip():
-    with st.spinner('Processing with models...'):
         if mode == "Single Model":
             if selected_model:
                 final_res, probs = predict_single_model(user_input, selected_model)
-                col1, col2 = st.columns([1, 2])
-                with col1:
-                    st.markdown(f"""
-                    <div class="main-card" style="border-top: 8px solid {label_colors[final_res]}">
-                        <div class="result-title">{selected_model}</div>
-                        <div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div>
-                        <div style="font-size: 18px; color: #64748b; margin-top: 15px;">Confidence: {max(probs)*100:.1f}%</div>
-                    </div>
-                    """, unsafe_allow_html=True)
-                with col2:
                     st.markdown('<div class="section-header">Confidence Scores</div>', unsafe_allow_html=True)
                     for i in range(5):
-                        label = id2label[i]
-                        prob = probs[i] * 100
-                        color = label_colors[label]
-                        st.markdown(f"""
-                        <div class="prob-row">
-                            <div class="prob-label">
-                                <span style="font-weight: 700;">{label}</span>
-                                <span style="font-weight: 700; color: {color};">{prob:.1f}%</span>
-                            </div>
-                            <div class="prob-bar-bg">
-                                <div class="prob-bar-fill" style="width: {min(prob, 100)}%; background: linear-gradient(90deg, {color}, {color}cc);"></div>
-                            </div>
-                        </div>
-                        """, unsafe_allow_html=True)
-            else:
-                 st.error("Model not selected or failed to load.")
         else:
             final_res, all_votes, avg_probs = predict_ensemble(user_input)
-            main_col, details_col = st.columns([1, 1.4])
-            with main_col:
-                st.markdown(f"""
-                <div class="main-card" style="border-top: 8px solid {label_colors[final_res]}; box-shadow: 0 25px 50px rgba(0,0,0,0.2);">
-                    <div class="result-title" style="font-size: 18px;">ENSEMBLE CONSENSUS</div>
-                    <div class="result-value" style="color: {label_colors[final_res]}; font-size: 60px;">{final_res}</div>
-                </div>
-                """, unsafe_allow_html=True)
-                st.markdown('<div class="section-header">Ensemble Probabilities</div>', unsafe_allow_html=True)
                 for i in range(5):
-                    label = id2label[i]
-                    prob = avg_probs[i] * 100
-                    color = label_colors[label]
-                    st.markdown(f"""
-                    <div class="prob-row">
-                        <div class="prob-label">
-                            <span>{label}</span>
-                            <span style="color: {color};">{prob:.1f}%</span>
-                        </div>
-                        <div class="prob-bar-bg">
-                            <div class="prob-bar-fill" style="width: {min(prob, 100)}%; background: linear-gradient(90deg, {color}, {color}cc);"></div>
-                        </div>
-                    </div>
-                    """, unsafe_allow_html=True)
-            with details_col:
                 st.markdown('<div class="section-header">Individual Model Votes</div>', unsafe_allow_html=True)
-                model_cols = st.columns(2)
                 for idx, (name, vote) in enumerate(zip(list(models_dict.keys()), all_votes)):
-                    with model_cols[idx % 2]:
-                        color = label_colors[vote]
-                        st.markdown(f"""
-                        <div class="model-card">
-                            <div class="model-name">{name}</div>
-                            <div style="color: {color}; font-weight: 800; font-size: 24px; margin-top: 8px;">{vote}</div>
-                        </div>
-                        """, unsafe_allow_html=True)
 elif analyze_btn and not user_input.strip():
     st.error("অনুগ্রহ করে কিছু টেক্সট লিখুন!")
-with st.expander("Example Political Texts", expanded=False):
-    examples = [
-        "সরকারের এই নীতি দেশকে ধ্বংসের দিকে নিয়ে যাবে!",
-        "চমৎকার সিদ্ধান্ত! দেশের জন্য গর্বিত। ভালো চলবে!",
-        "রাজনীতির কোনো পরিবর্তন হবে না, সব একই রকম"
-    ]
-    example_cols = st.columns(3)
-    for idx, example in enumerate(examples):
-        with example_cols[idx]:
-            if st.button(example[:40] + "..." if len(example) > 40 else example,
-                       use_container_width=True):
-                st.session_state.user_input = example
                 st.rerun()

     def __init__(self, num_classes=5):
         super().__init__()
         self.banglabert = AutoModel.from_pretrained("csebuetnlp/banglabert")
         self.hidden_size = self.banglabert.config.hidden_size
         self.cnn_layers = nn.ModuleList([
             nn.Conv1d(self.hidden_size, 128, kernel_size=k, padding=k//2)
             for k in [3,5,7]
         ])
         self.attention = nn.MultiheadAttention(self.hidden_size, 8, batch_first=True)
         self.classifier = nn.Sequential(
             nn.LayerNorm(self.hidden_size),
             nn.GELU(),
             nn.Linear(256, num_classes)
         )
         self.explainability_weights = nn.Parameter(torch.ones(num_classes) * 0.1)
     def forward(self, input_ids, attention_mask=None):
         bert_out = self.banglabert(input_ids, attention_mask=attention_mask).last_hidden_state
         cnn_outs = [F.relu(cnn(bert_out.transpose(1,2)).transpose(1,2)) for cnn in self.cnn_layers]
         cnn_concat = torch.cat(cnn_outs, dim=-1)
         if not hasattr(self, 'cnn_proj'):
             self.cnn_proj = nn.Linear(384, self.hidden_size).to(input_ids.device)
         attn_input = self.cnn_proj(cnn_concat)
         attn_out, _ = self.attention(attn_input, attn_input, attn_input)
         pooled = attn_out[:, 0, :]
         logits = self.classifier(pooled)
         return logits, self.explainability_weights
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
 """, unsafe_allow_html=True)
 id2label = {0: 'Very Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Very Positive'}
+label_colors = { 'Very Negative': '#ef4444', 'Negative': '#f97316', 'Neutral': '#64748b', 'Positive': '#22c55e', 'Very Positive': '#16a34a' }
 @st.cache_resource
 def load_models():
     models_loaded = {}
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     standard_models = {
         "model_banglabert": "rocky250/Sentiment-banglabert",
         "model_mbert": "rocky250/Sentiment-mbert",
         "model_bbase": "rocky250/Sentiment-bbase",
         "model_xlmr": "rocky250/Sentiment-xlmr"
     }
     for name, repo in standard_models.items():
         try:
             tokenizer = AutoTokenizer.from_pretrained(repo)
             model = AutoModelForSequenceClassification.from_pretrained(repo)
             models_loaded[name] = (tokenizer, model.to(device))
+        except:
             continue
     try:
         model_path = hf_hub_download(repo_id="rocky250/bangla-political", filename="pytorch_model.bin")
         tokenizer = AutoTokenizer.from_pretrained("rocky250/bangla-political")
         model = BanglaPoliticalNet(num_classes=5)
         if not hasattr(model, 'cnn_proj'):
              model.cnn_proj = nn.Linear(384, model.hidden_size)
         model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
         models_loaded["bangla_political"] = (tokenizer, model.to(device))
+    except:
+        pass
     return models_loaded
 models_dict = load_models()
 def predict_single_model(text, model_name):
     clean_text = normalize(text)
     tokenizer, model = models_dict[model_name]
     device = next(model.parameters()).device
     inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
     with torch.no_grad():
         if isinstance(model, BanglaPoliticalNet):
              logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
         else:
             outputs = model(**inputs)
             logits = outputs.logits
     probs = F.softmax(logits, dim=1).cpu().numpy()[0]
     pred_id = np.argmax(probs)
     prediction = id2label[pred_id]
     return prediction, probs
 def predict_ensemble(text):
     clean_text = normalize(text)
     all_probs = []
     all_predictions = []
     for name in models_dict.keys():
         try:
             pred, probs = predict_single_model(clean_text, name)
             all_predictions.append(pred)
         except:
             continue
     if all_probs:
         avg_probs = np.mean(all_probs, axis=0)
         final_pred = id2label[np.argmax(avg_probs)]
         return final_pred, all_predictions, avg_probs
     return "Error", [], np.zeros(5)
 st.markdown("""
+<div style='text-align: center; background: rgba(255,255,255,0.1); padding: 30px; border-radius: 20px; margin-bottom: 30px; backdrop-filter: blur(20px);'>
     <h1 style='font-size: 3.5rem; margin: 0; background: linear-gradient(45deg, #ffffff, #e2e8f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;'>Political Sentiment Analysis</h1>
 </div>
 """, unsafe_allow_html=True)
 col1, col2 = st.columns([3, 1])
 with col1:
+    user_input = st.text_area("Enter Bengali political text:", height=140, placeholder="এই বক্সে বাংলা রাজনৈতিক মন্তব্য লিখুন...")
 with col2:
     st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
+    mode = st.radio("Analysis Mode:", ["Single Model", "Ensemble"], horizontal=True)
     selected_model = None
     if mode == "Single Model":
         model_options = {name: name for name in models_dict.keys()}
         if model_options:
             selected_model = st.selectbox("Select Model:", list(model_options.keys()), index=0)
 analyze_btn = st.button("ANALYZE SENTIMENT", type="primary", use_container_width=True)
 if analyze_btn and user_input.strip():
+    with st.spinner('Processing...'):
         if mode == "Single Model":
             if selected_model:
                 final_res, probs = predict_single_model(user_input, selected_model)
+                c1, c2 = st.columns([1, 2])
+                with c1:
+                    st.markdown(f'<div class="main-card" style="border-top: 8px solid {label_colors[final_res]}"><div class="result-title">{selected_model}</div><div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div><div style="font-size: 18px; color: #64748b; margin-top: 15px;">Confidence: {max(probs)*100:.1f}%</div></div>', unsafe_allow_html=True)
+                with c2:
                     st.markdown('<div class="section-header">Confidence Scores</div>', unsafe_allow_html=True)
                     for i in range(5):
+                        lbl = id2label[i]
+                        p = probs[i] * 100
+                        clr = label_colors[lbl]
+                        st.markdown(f'<div class="prob-row"><div class="prob-label"><span>{lbl}</span><span style="color: {clr};">{p:.1f}%</span></div><div class="prob-bar-bg"><div class="prob-bar-fill" style="width: {min(p, 100)}%; background: {clr};"></div></div></div>', unsafe_allow_html=True)
         else:
             final_res, all_votes, avg_probs = predict_ensemble(user_input)
+            mc, dc = st.columns([1, 1.4])
+            with mc:
+                st.markdown(f'<div class="main-card" style="border-top: 8px solid {label_colors[final_res]}"><div class="result-title">ENSEMBLE CONSENSUS</div><div class="result-value" style="color: {label_colors[final_res]}">{final_res}</div></div>', unsafe_allow_html=True)
                 for i in range(5):
+                    lbl = id2label[i]
+                    p = avg_probs[i] * 100
+                    clr = label_colors[lbl]
+                    st.markdown(f'<div class="prob-row"><div class="prob-label"><span>{lbl}</span><span style="color: {clr};">{p:.1f}%</span></div><div class="prob-bar-bg"><div class="prob-bar-fill" style="width: {min(p, 100)}%; background: {clr};"></div></div></div>', unsafe_allow_html=True)
+            with dc:
                 st.markdown('<div class="section-header">Individual Model Votes</div>', unsafe_allow_html=True)
+                m_cols = st.columns(2)
                 for idx, (name, vote) in enumerate(zip(list(models_dict.keys()), all_votes)):
+                    with m_cols[idx % 2]:
+                        st.markdown(f'<div class="model-card"><div class="model-name">{name}</div><div style="color: {label_colors[vote]}; font-weight: 800; font-size: 24px;">{vote}</div></div>', unsafe_allow_html=True)
 elif analyze_btn and not user_input.strip():
     st.error("অনুগ্রহ করে কিছু টেক্সট লিখুন!")
+with st.expander("Example Political Texts"):
+    examples = ["সরকারের এই নীতি দেশকে ধ্বংসের দিকে নিয়ে যাবে!", "চমৎকার সিদ্ধান্ত! দেশের জন্য গর্বিত।", "রাজনীতির কোনো পরিবর্তন হবে না"]
+    ex_cols = st.columns(3)
+    for idx, ex in enumerate(examples):
+        with ex_cols[idx]:
+            if st.button(ex[:30] + "...", use_container_width=True):
+                st.session_state.user_input = ex
                 st.rerun()