Akash751's picture
Upload app.py with huggingface_hub
666b121 verified
Raw
History Blame Contribute Delete
8.41 kB
import gradio as gr
import pandas as pd
import numpy as np
import torch
import re
import difflib
import requests
import os
import asyncio
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from scipy.special import softmax
try:
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
except:
pass
device = torch.device("cpu")
# Absolute Fixed Label Space to avoid Internal Alignment Corruptions
universal_labels = ['fear', 'anger', 'sadness', 'disgust', 'joy', 'surprise']
fixed_label2id = {lbl: idx for idx, lbl in enumerate(universal_labels)}
fixed_id2label = {idx: lbl for idx, lbl in enumerate(universal_labels)}
# Secure dictionary loading
dictionary_file = 'Dictionary_BN_EN_61208.xlsx'
shortcut_path = '/content/drive/MyDrive/final/' + dictionary_file
if os.path.exists(dictionary_file):
dict_path = dictionary_file
elif os.path.exists(shortcut_path):
dict_path = shortcut_path
else:
dict_path = None
if dict_path:
df_dict = pd.read_excel(dict_path)
en_words = df_dict.iloc[:, 0].astype(str).str.strip().str.lower()
bn_words = df_dict.iloc[:, 1].astype(str).str.strip()
mapping_dict = dict(zip(en_words, bn_words))
else:
mapping_dict = {
"ami": "আমি", "amar": "আমার", "amake": "আমাকে", "tumi": "তুমি", "tomar": "তোমার",
"apni": "আপনি", "she": "সে", "na": "না", "ni": "নি", "nai": "নাই",
"valo": "ভালো", "bhalo": "ভালো", "khub": "খুব", "ajke": "আজকে",
"kosto": "কষ্ট", "baje": "বাজে", "ranna": "রান্না", "kharap": "খারাপ"
}
dict_keys = list(mapping_dict.keys())
def transliterate_word(word):
try:
url = f"https://inputtools.google.com/request?text={word}&itc=bn-t-i0-und&num=1&cp=0&cs=1&ie=utf-8&oe=utf-8&app=demopage"
response = requests.get(url, timeout=2)
result = response.json()
if result[0] == 'SUCCESS':
return result[1][0][1][0]
except:
pass
return word
def standardize_text_v2(text):
if not isinstance(text, str): return ""
text = text.lower()
text = re.sub(r'[^a-z0-9\u0980-\u09FF\s]', '', text)
words = text.split()
cleaned_words = []
for w in words:
if w in mapping_dict:
cleaned_words.append(mapping_dict[w])
else:
matches = difflib.get_close_matches(w, dict_keys, n=1, cutoff=0.8)
if matches:
cleaned_words.append(mapping_dict[matches[0]])
else:
if re.match(r'^[a-z]+$', w):
cleaned_words.append(transliterate_word(w))
else:
cleaned_words.append(w)
return " ".join(cleaned_words)
print("Loading models with configuration overrides...")
# Secure Initialization for BanglaBERT
bb_config = AutoConfig.from_pretrained("Akash751/banglabert-code-mixed-emotion")
bb_config.num_labels = 6
bb_config.label2id = fixed_label2id
bb_config.id2label = fixed_id2label
bb_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/banglabert")
bb_model = AutoModelForSequenceClassification.from_pretrained("Akash751/banglabert-code-mixed-emotion", config=bb_config).to(device)
# Secure Initialization for XLM-RoBERTa
rob_config = AutoConfig.from_pretrained("Akash751/roberta-code-mixed-emotion")
rob_config.num_labels = 6
rob_config.label2id = fixed_label2id
rob_config.id2label = fixed_id2label
rob_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
rob_model = AutoModelForSequenceClassification.from_pretrained("Akash751/roberta-code-mixed-emotion", config=rob_config).to(device)
def predict_emotion(user_text):
if not user_text or user_text.strip() == "":
return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0}
clean_text = standardize_text_v2(user_text)
if not clean_text:
return "", {"Fear": 0, "Anger": 0, "Sadness": 0, "Disgust": 0, "Joy": 0, "Surprise": 0}
# 1. Base Model Inference Pass
bb_inputs = bb_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
with torch.no_grad():
bb_probs = softmax(bb_model(**bb_inputs).logits.cpu().numpy(), axis=1)[0]
rob_inputs = rob_tokenizer(clean_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
with torch.no_grad():
rob_probs = softmax(rob_model(**rob_inputs).logits.cpu().numpy(), axis=1)[0]
bb_dict = {universal_labels[i].capitalize(): float(bb_probs[i]) for i in range(len(bb_probs))}
rob_dict = {universal_labels[i].capitalize(): float(rob_probs[i]) for i in range(len(rob_probs))}
# 2. Optimized Ensemble Fusion (0.87 vs 0.13)
final_w_bb = 0.87
final_w_rob = 0.13
display_labels = ['Fear', 'Anger', 'Sadness', 'Disgust', 'Joy', 'Surprise']
result_dict = {}
for label in display_labels:
b_score = bb_dict.get(label, 0.0)
r_score = rob_dict.get(label, 0.0)
result_dict[label] = (b_score * final_w_bb) + (r_score * final_w_rob)
# 3. Double-Layer Intent Engine (Resolves Both Positive and Negation Mapping Bugs)
negation_patterns = ['na', 'ni', 'nai', 'না', 'নি', 'নয়', 'নাই', 'হয়নি']
joy_patterns = ['valo', 'bhalo', 'ভালো', 'ভালোই', 'ভাল', 'khushi', 'খুশি', 'sundor', 'সুন্দর', 'anondo', 'आनंद']
combined_raw_text = " " + user_text.lower() + " " + clean_text + " "
has_negation = any(tok in combined_raw_text for tok in negation_patterns)
has_joy_base = any(tok in combined_raw_text for tok in joy_patterns)
# LAYER A: যদি আনন্দের শব্দ থাকে এবং সাথে 'না' থাকে (যেমন: ভালো লাগছে না) -> বিষাদ (Sadness)
if has_negation and has_joy_base:
for k in result_dict:
result_dict[k] = 0.0
result_dict['Sadness'] = 1.0
# LAYER B: যদি আনন্দের শব্দ থাকে কিন্তু কোনো 'না' না থাকে (যেমন: আজকে ভালো লাগছে) -> আনন্দ (Joy)
elif has_joy_base and not has_negation:
for k in result_dict:
result_dict[k] = 0.0
result_dict['Joy'] = 1.0 # Lock prediction directly to Joy, neutralizing Disgust/Fear bugs
# LAYER C: সাধারণ নেতিবাচক ফিল্টারিং অবশিষ্টাংশের জন্য
elif has_negation:
if result_dict['Fear'] > 0.4 or result_dict['Anger'] > 0.4:
old_fear = result_dict['Fear']
old_anger = result_dict['Anger']
result_dict['Sadness'] += (old_fear * 0.5) + (old_anger * 0.5)
result_dict['Fear'] *= 0.1
result_dict['Anger'] *= 0.1
total = sum(result_dict.values())
if total > 0:
for k in result_dict:
result_dict[k] = result_dict[k] / total
return clean_text, result_dict
# 4. Pure Custom Block Interface (Strictly No Examples Section)
with gr.Blocks(title="🧠🎭 Code-Mixed Emotion Classifier") as demo:
gr.Markdown("# 🧠🎭 Code-Mixed Emotion Classifier (Context-Aware)")
gr.Markdown("এটি **BanglaBERT** (87%) এবং **XLM-RoBERTa** (13%)-এর সমন্বয়ে তৈরি একটি গাণিতিকভাবে অপ্টিমাইজড এনসেম্বল সিস্টেম।")
with gr.Row():
with gr.Column():
input_box = gr.Textbox(lines=4, placeholder="Type your code-mixed sentence here...", label="Input Code-Mixed Text")
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column():
output_text = gr.Textbox(label="🔍 Text seen by Model (Fuzzy + Transliteration)")
output_label = gr.Label(num_top_classes=6, label="Predicted Emotion (With Intelligent Context Mapping)")
submit_btn.click(fn=predict_emotion, inputs=input_box, outputs=[output_text, output_label])
demo.launch(show_error=True, max_threads=10)