import gradio as gr import pandas as pd import numpy as np import torch import re import difflib import requests import os import asyncio from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig from scipy.special import softmax try: asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) except: pass device = torch.device("cpu") # Absolute Fixed Label Space to avoid Internal Alignment Corruptions universal_labels = ['fear', 'anger', 'sadness', 'disgust', 'joy', 'surprise'] fixed_label2id = {lbl: idx for idx, lbl in enumerate(universal_labels)} fixed_id2label = {idx: lbl for idx, lbl in enumerate(universal_labels)} # Secure dictionary loading dictionary_file = 'Dictionary_BN_EN_61208.xlsx' shortcut_path = '/content/drive/MyDrive/final/' + dictionary_file if os.path.exists(dictionary_file): dict_path = dictionary_file elif os.path.exists(shortcut_path): dict_path = shortcut_path else: dict_path = None if dict_path: df_dict = pd.read_excel(dict_path) en_words = df_dict.iloc[:, 0].astype(str).str.strip().str.lower() bn_words = df_dict.iloc[:, 1].astype(str).str.strip() mapping_dict = dict(zip(en_words, bn_words)) else: mapping_dict = { "ami": "আমি", "amar": "আমার", "amake": "আমাকে", "tumi": "তুমি", "tomar": "তোমার", "apni": "আপনি", "she": "সে", "na": "না", "ni": "নি", "nai": "নাই", "valo": "ভালো", "bhalo": "ভালো", "khub": "খুব", "ajke": "আজকে", "kosto": "কষ্ট", "baje": "বাজে", "ranna": "রান্না", "kharap": "খারাপ" } dict_keys = list(mapping_dict.keys()) def transliterate_word(word): try: url = f"https://inputtools.google.com/request?text={word}&itc=bn-t-i0-und&num=1&cp=0&cs=1&ie=utf-8&oe=utf-8&app=demopage" response = requests.get(url, timeout=2) result = response.json() if result[0] == 'SUCCESS': return result[1][0][1][0] except: pass return word def standardize_text_v2(text): if not isinstance(text, str): return "" text = text.lower() text = re.sub(r'[^a-z0-9\u0980-\u09FF\s]', '', text) words = text.split() cleaned_words = [] for w in words: if w in mapping_dict: cleaned_words.append(mapping_dict[w]) else: matches = difflib.get_close_matches(w, dict_keys, n=1, cutoff=0.8) if matches: cleaned_words.append(mapping_dict[matches[0]]) else: if re.match(r'^[a-z]+$', w): cleaned_words.append(transliterate_word(w)) else: cleaned_words.append(w) return " ".join(cleaned_words) print("Loading models with configuration overrides...") # Secure Initialization for BanglaBERT bb_config = AutoConfig.from_pretrained("Akash751/banglabert-code-mixed-emotion") bb_config.num_labels = 6 bb_config.label2id = fixed_label2id bb_config.id2label = fixed_id2label bb_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/banglabert") bb_model = AutoModelForSequenceClassification.from_pretrained("Akash751/banglabert-code-mixed-emotion", config=bb_config).to(device) # Secure Initialization for XLM-RoBERTa rob_config = AutoConfig.from_pretrained("Akash751/roberta-code-mixed-emotion") rob_config.num_labels = 6 rob_config.label2id = fixed_label2id rob_config.id2label = fixed_id2label rob_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") rob_model = AutoModelForSequenceClassification.from_pretrained("Akash751/roberta-code-mixed-emotion", config=rob_config).to(device) def predict_emotion(user_text): if not user_text or user_text.strip() == "": return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0} clean_text = standardize_text_v2(user_text) if not clean_text: return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0} # 1. Base Model Inference Pass bb_inputs = bb_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device) with torch.no_grad(): bb_probs = softmax(bb_model(**bb_inputs).logits.cpu().numpy(), axis=1)[0] rob_inputs = rob_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device) with torch.no_grad(): rob_probs = softmax(rob_model(**rob_inputs).logits.cpu().numpy(), axis=1)[0] bb_dict = {universal_labels[i].capitalize(): float(bb_probs[i]) for i in range(len(bb_probs))} rob_dict = {universal_labels[i].capitalize(): float(rob_probs[i]) for i in range(len(rob_probs))} # 2. Optimized Ensemble Fusion (0.87 vs 0.13) final_w_bb = 0.87 final_w_rob = 0.13 display_labels = ['Fear', 'Anger', 'Sadness', 'Disgust', 'Joy', 'Surprise'] result_dict = {} for label in display_labels: b_score = bb_dict.get(label, 0.0) r_score = rob_dict.get(label, 0.0) result_dict[label] = (b_score * final_w_bb) + (r_score * final_w_rob) # 3. Double-Layer Intent Engine (Resolves Both Positive and Negation Mapping Bugs) negation_patterns = ['na', 'ni', 'nai', 'না', 'নি', 'নয়', 'নাই', 'হয়নি'] joy_patterns = ['valo', 'bhalo', 'ভালো', 'ভালোই', 'ভাল', 'khushi', 'খুশি', 'sundor', 'সুন্দর', 'anondo', 'आनंद'] combined_raw_text = " " + user_text.lower() + " " + clean_text + " " has_negation = any(tok in combined_raw_text for tok in negation_patterns) has_joy_base = any(tok in combined_raw_text for tok in joy_patterns) # LAYER A: যদি আনন্দের শব্দ থাকে এবং সাথে 'না' থাকে (যেমন: ভালো লাগছে না) -> বিষাদ (Sadness) if has_negation and has_joy_base: for k in result_dict: result_dict[k] = 0.0 result_dict['Sadness'] = 1.0 # LAYER B: যদি আনন্দের শব্দ থাকে কিন্তু কোনো 'না' না থাকে (যেমন: আজকে ভালো লাগছে) -> আনন্দ (Joy) elif has_joy_base and not has_negation: for k in result_dict: result_dict[k] = 0.0 result_dict['Joy'] = 1.0 # Lock prediction directly to Joy, neutralizing Disgust/Fear bugs # LAYER C: সাধারণ নেতিবাচক ফিল্টারিং অবশিষ্টাংশের জন্য elif has_negation: if result_dict['Fear'] > 0.4 or result_dict['Anger'] > 0.4: old_fear = result_dict['Fear'] old_anger = result_dict['Anger'] result_dict['Sadness'] += (old_fear * 0.5) + (old_anger * 0.5) result_dict['Fear'] *= 0.1 result_dict['Anger'] *= 0.1 total = sum(result_dict.values()) if total > 0: for k in result_dict: result_dict[k] = result_dict[k] / total return clean_text, result_dict # 4. Pure Custom Block Interface (Strictly No Examples Section) with gr.Blocks(title="🧠🎭 Code-Mixed Emotion Classifier") as demo: gr.Markdown("# 🧠🎭 Code-Mixed Emotion Classifier (Context-Aware)") gr.Markdown("এটি **BanglaBERT** (87%) এবং **XLM-RoBERTa** (13%)-এর সমন্বয়ে তৈরি একটি গাণিতিকভাবে অপ্টিমাইজড এনসেম্বল সিস্টেম।") with gr.Row(): with gr.Column(): input_box = gr.Textbox(lines=4, placeholder="Type your code-mixed sentence here...", label="Input Code-Mixed Text") submit_btn = gr.Button("Submit", variant="primary") with gr.Column(): output_text = gr.Textbox(label="🔍 Text seen by Model (Fuzzy + Transliteration)") output_label = gr.Label(num_top_classes=6, label="Predicted Emotion (With Intelligent Context Mapping)") submit_btn.click(fn=predict_emotion, inputs=input_box, outputs=[output_text, output_label]) demo.launch(show_error=True, max_threads=10)