| import gradio as gr |
| import pandas as pd |
| import numpy as np |
| import torch |
| import re |
| import difflib |
| import requests |
| import os |
| import asyncio |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig |
| from scipy.special import softmax |
|
|
| try: |
| asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) |
| except: |
| pass |
|
|
| device = torch.device("cpu") |
|
|
| |
| universal_labels = ['fear', 'anger', 'sadness', 'disgust', 'joy', 'surprise'] |
| fixed_label2id = {lbl: idx for idx, lbl in enumerate(universal_labels)} |
| fixed_id2label = {idx: lbl for idx, lbl in enumerate(universal_labels)} |
|
|
| |
| dictionary_file = 'Dictionary_BN_EN_61208.xlsx' |
| shortcut_path = '/content/drive/MyDrive/final/' + dictionary_file |
|
|
| if os.path.exists(dictionary_file): |
| dict_path = dictionary_file |
| elif os.path.exists(shortcut_path): |
| dict_path = shortcut_path |
| else: |
| dict_path = None |
|
|
| if dict_path: |
| df_dict = pd.read_excel(dict_path) |
| en_words = df_dict.iloc[:, 0].astype(str).str.strip().str.lower() |
| bn_words = df_dict.iloc[:, 1].astype(str).str.strip() |
| mapping_dict = dict(zip(en_words, bn_words)) |
| else: |
| mapping_dict = { |
| "ami": "আমি", "amar": "আমার", "amake": "আমাকে", "tumi": "তুমি", "tomar": "তোমার", |
| "apni": "আপনি", "she": "সে", "na": "না", "ni": "নি", "nai": "নাই", |
| "valo": "ভালো", "bhalo": "ভালো", "khub": "খুব", "ajke": "আজকে", |
| "kosto": "কষ্ট", "baje": "বাজে", "ranna": "রান্না", "kharap": "খারাপ" |
| } |
|
|
| dict_keys = list(mapping_dict.keys()) |
|
|
| def transliterate_word(word): |
| try: |
| url = f"https://inputtools.google.com/request?text={word}&itc=bn-t-i0-und&num=1&cp=0&cs=1&ie=utf-8&oe=utf-8&app=demopage" |
| response = requests.get(url, timeout=2) |
| result = response.json() |
| if result[0] == 'SUCCESS': |
| return result[1][0][1][0] |
| except: |
| pass |
| return word |
|
|
| def standardize_text_v2(text): |
| if not isinstance(text, str): return "" |
| text = text.lower() |
| text = re.sub(r'[^a-z0-9\u0980-\u09FF\s]', '', text) |
| words = text.split() |
| |
| cleaned_words = [] |
| for w in words: |
| if w in mapping_dict: |
| cleaned_words.append(mapping_dict[w]) |
| else: |
| matches = difflib.get_close_matches(w, dict_keys, n=1, cutoff=0.8) |
| if matches: |
| cleaned_words.append(mapping_dict[matches[0]]) |
| else: |
| if re.match(r'^[a-z]+$', w): |
| cleaned_words.append(transliterate_word(w)) |
| else: |
| cleaned_words.append(w) |
| |
| return " ".join(cleaned_words) |
|
|
| print("Loading models with configuration overrides...") |
|
|
| |
| bb_config = AutoConfig.from_pretrained("Akash751/banglabert-code-mixed-emotion") |
| bb_config.num_labels = 6 |
| bb_config.label2id = fixed_label2id |
| bb_config.id2label = fixed_id2label |
|
|
| bb_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/banglabert") |
| bb_model = AutoModelForSequenceClassification.from_pretrained("Akash751/banglabert-code-mixed-emotion", config=bb_config).to(device) |
|
|
| |
| rob_config = AutoConfig.from_pretrained("Akash751/roberta-code-mixed-emotion") |
| rob_config.num_labels = 6 |
| rob_config.label2id = fixed_label2id |
| rob_config.id2label = fixed_id2label |
|
|
| rob_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") |
| rob_model = AutoModelForSequenceClassification.from_pretrained("Akash751/roberta-code-mixed-emotion", config=rob_config).to(device) |
|
|
| def predict_emotion(user_text): |
| if not user_text or user_text.strip() == "": |
| return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0} |
| |
| clean_text = standardize_text_v2(user_text) |
| if not clean_text: |
| return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0} |
| |
| |
| bb_inputs = bb_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device) |
| with torch.no_grad(): |
| bb_probs = softmax(bb_model(**bb_inputs).logits.cpu().numpy(), axis=1)[0] |
| |
| rob_inputs = rob_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device) |
| with torch.no_grad(): |
| rob_probs = softmax(rob_model(**rob_inputs).logits.cpu().numpy(), axis=1)[0] |
| |
| bb_dict = {universal_labels[i].capitalize(): float(bb_probs[i]) for i in range(len(bb_probs))} |
| rob_dict = {universal_labels[i].capitalize(): float(rob_probs[i]) for i in range(len(rob_probs))} |
| |
| |
| final_w_bb = 0.87 |
| final_w_rob = 0.13 |
| |
| display_labels = ['Fear', 'Anger', 'Sadness', 'Disgust', 'Joy', 'Surprise'] |
| result_dict = {} |
| |
| for label in display_labels: |
| b_score = bb_dict.get(label, 0.0) |
| r_score = rob_dict.get(label, 0.0) |
| result_dict[label] = (b_score * final_w_bb) + (r_score * final_w_rob) |
| |
| |
| negation_patterns = ['na', 'ni', 'nai', 'না', 'নি', 'নয়', 'নাই', 'হয়নি'] |
| joy_patterns = ['valo', 'bhalo', 'ভালো', 'ভালোই', 'ভাল', 'khushi', 'খুশি', 'sundor', 'সুন্দর', 'anondo', 'आनंद'] |
| |
| combined_raw_text = " " + user_text.lower() + " " + clean_text + " " |
| |
| has_negation = any(tok in combined_raw_text for tok in negation_patterns) |
| has_joy_base = any(tok in combined_raw_text for tok in joy_patterns) |
| |
| |
| if has_negation and has_joy_base: |
| for k in result_dict: |
| result_dict[k] = 0.0 |
| result_dict['Sadness'] = 1.0 |
| |
| |
| elif has_joy_base and not has_negation: |
| for k in result_dict: |
| result_dict[k] = 0.0 |
| result_dict['Joy'] = 1.0 |
| |
| |
| elif has_negation: |
| if result_dict['Fear'] > 0.4 or result_dict['Anger'] > 0.4: |
| old_fear = result_dict['Fear'] |
| old_anger = result_dict['Anger'] |
| result_dict['Sadness'] += (old_fear * 0.5) + (old_anger * 0.5) |
| result_dict['Fear'] *= 0.1 |
| result_dict['Anger'] *= 0.1 |
| |
| total = sum(result_dict.values()) |
| if total > 0: |
| for k in result_dict: |
| result_dict[k] = result_dict[k] / total |
|
|
| return clean_text, result_dict |
|
|
| |
| with gr.Blocks(title="🧠🎭 Code-Mixed Emotion Classifier") as demo: |
| gr.Markdown("# 🧠🎭 Code-Mixed Emotion Classifier (Context-Aware)") |
| gr.Markdown("এটি **BanglaBERT** (87%) এবং **XLM-RoBERTa** (13%)-এর সমন্বয়ে তৈরি একটি গাণিতিকভাবে অপ্টিমাইজড এনসেম্বল সিস্টেম।") |
| |
| with gr.Row(): |
| with gr.Column(): |
| input_box = gr.Textbox(lines=4, placeholder="Type your code-mixed sentence here...", label="Input Code-Mixed Text") |
| submit_btn = gr.Button("Submit", variant="primary") |
| |
| with gr.Column(): |
| output_text = gr.Textbox(label="🔍 Text seen by Model (Fuzzy + Transliteration)") |
| output_label = gr.Label(num_top_classes=6, label="Predicted Emotion (With Intelligent Context Mapping)") |
| |
| submit_btn.click(fn=predict_emotion, inputs=input_box, outputs=[output_text, output_label]) |
|
|
| demo.launch(show_error=True, max_threads=10) |