import streamlit as st import torch import torch.nn.functional as F import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification from normalizer import normalize import torch.nn as nn from transformers import AutoModel st.set_page_config(page_title="Political Sentiment", layout="wide") class BanglaPoliticalNet(nn.Module): def __init__(self, num_classes=5): super().__init__() self.banglabert = AutoModel.from_pretrained("csebuetnlp/banglabert") self.hidden_size = self.banglabert.config.hidden_size self.cnn_layers = nn.ModuleList([ nn.Conv1d(self.hidden_size, 128, kernel_size=k, padding=k//2) for k in [3,5,7] ]) self.attention = nn.MultiheadAttention(self.hidden_size, 8, batch_first=True) self.classifier = nn.Sequential( nn.Dropout(0.3), nn.Linear(self.hidden_size, 512), nn.ReLU(), nn.Dropout(0.2), nn.Linear(512, num_classes) ) def forward(self, input_ids, attention_mask=None): bert_out = self.banglabert(input_ids, attention_mask=attention_mask).last_hidden_state cnn_features = [] for cnn in self.cnn_layers: cnn_out = cnn(bert_out.transpose(1,2)).transpose(1,2) cnn_features.append(F.relu(cnn_out)) cnn_concat = torch.cat(cnn_features, dim=-1) proj = nn.Linear(384, self.hidden_size).to(input_ids.device) attn_input = proj(cnn_concat) attn_out, _ = self.attention(attn_input, attn_input, attn_input) attn_pooled = attn_out[:, 0, :] logits = self.classifier(attn_pooled) return logits st.markdown(""" """, unsafe_allow_html=True) id2label = {0: 'Very Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Very Positive'} label_colors = { 'Very Negative': '#ef4444', 'Negative': '#f97316', 'Neutral': '#64748b', 'Positive': '#22c55e', 'Very Positive': '#16a34a' } @st.cache_resource def load_models(): models_loaded = {} target_models = { "model_banglabert": "rocky250/Sentiment-banglabert", "model_mbert": "rocky250/Sentiment-mbert", "model_bbase": "rocky250/Sentiment-bbase", "model_xlmr": "rocky250/Sentiment-xlmr", "bangla_political": "rocky250/bangla-political" } for name, repo in target_models.items(): try: tokenizer = AutoTokenizer.from_pretrained(repo) model = AutoModelForSequenceClassification.from_pretrained(repo) models_loaded[name] = (tokenizer, model.to('cuda' if torch.cuda.is_available() else 'cpu')) except: continue return models_loaded models_dict = load_models() def predict_single_model(text, model_name): clean_text = normalize(text) tokenizer, model = models_dict[model_name] device = next(model.parameters()).device inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = F.softmax(logits, dim=1).cpu().numpy()[0] pred_id = np.argmax(probs) prediction = id2label[pred_id] return prediction, probs def predict_ensemble(text): clean_text = normalize(text) all_probs = [] all_predictions = [] for name in models_dict.keys(): try: pred, probs = predict_single_model(clean_text, name) all_probs.append(probs) all_predictions.append(pred) except: continue if all_probs: avg_probs = np.mean(all_probs, axis=0) final_pred = id2label[np.argmax(avg_probs)] return final_pred, all_predictions, avg_probs return "Error", [], np.zeros(5) st.markdown("""

Political Sentiment Analysis

""", unsafe_allow_html=True) col1, col2 = st.columns([3, 1]) with col1: user_input = st.text_area("Enter Bengali political text:", height=140, placeholder="এই বক্সে বাংলা রাজনৈতিক মন্তব্য লিখুন...", help="Type or paste Bengali political text for sentiment analysis") with col2: st.markdown("
", unsafe_allow_html=True) mode = st.radio("Analysis Mode:", ["Single Model", "Ensemble"], horizontal=True) selected_model = None if mode == "Single Model": model_options = {name: name for name in models_dict.keys()} selected_model = st.selectbox("Select Model:", list(model_options.keys()), index=0) analyze_btn = st.button("ANALYZE SENTIMENT", type="primary", use_container_width=True) if analyze_btn and user_input.strip(): with st.spinner('Processing with models...'): if mode == "Single Model": model_name = selected_model final_res, probs = predict_single_model(user_input, model_name) col1, col2 = st.columns([1, 2]) with col1: st.markdown(f"""
{model_name}
{final_res}
Confidence: {max(probs)*100:.1f}%
""", unsafe_allow_html=True) with col2: st.markdown('
Confidence Scores
', unsafe_allow_html=True) for i in range(5): label = id2label[i] prob = probs[i] * 100 color = label_colors[label] st.markdown(f"""
{label} {prob:.1f}%
""", unsafe_allow_html=True) else: final_res, all_votes, avg_probs = predict_ensemble(user_input) main_col, details_col = st.columns([1, 1.4]) with main_col: st.markdown(f"""
ENSEMBLE CONSENSUS
{final_res}
""", unsafe_allow_html=True) st.markdown('
Ensemble Probabilities
', unsafe_allow_html=True) for i in range(5): label = id2label[i] prob = avg_probs[i] * 100 color = label_colors[label] st.markdown(f"""
{label} {prob:.1f}%
""", unsafe_allow_html=True) with details_col: st.markdown('
Individual Model Votes
', unsafe_allow_html=True) model_cols = st.columns(2) for idx, (name, vote) in enumerate(zip(list(models_dict.keys()), all_votes)): with model_cols[idx % 2]: color = label_colors[vote] st.markdown(f"""
{name}
{vote}
""", unsafe_allow_html=True) elif analyze_btn and not user_input.strip(): st.error("অনুগ্রহ করে কিছু টেক্সট লিখুন!") with st.expander("Example Political Texts", expanded=False): examples = [ "সরকারের এই নীতি দেশকে ধ্বংসের দিকে নিয়ে যাবে!", "চমৎকার সিদ্ধান্ত! দেশের জন্য গর্বিত। ভালো চলবে!", "রাজনীতির কোনো পরিবর্তন হবে না, সব একই রকম" ] example_cols = st.columns(3) for idx, example in enumerate(examples): with example_cols[idx]: if st.button(example[:40] + "..." if len(example) > 40 else example, use_container_width=True): st.session_state.user_input = example st.rerun()