File size: 7,129 Bytes
6ac050a c7cadf9 6ac050a c7cadf9 751db91 6ac050a 751db91 c7cadf9 6ac050a 751db91 6ac050a c7cadf9 6ac050a 751db91 c7cadf9 751db91 6ac050a 751db91 c7cadf9 6ac050a 751db91 6ac050a 751db91 c7cadf9 751db91 c7cadf9 751db91 c7cadf9 751db91 c7cadf9 6ac050a 751db91 6ac050a 751db91 6ac050a 751db91 c7cadf9 6ac050a c7cadf9 751db91 c7cadf9 751db91 6ac050a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | import gradio as gr
import pandas as pd
import numpy as np
import json
import pickle
import re
# Load saved components
def load_model_components():
"""Load all saved model components"""
try:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
model = load_model('best_emotion_model.h5')
with open('tokenizer.pickle', 'rb') as handle:
tokenizer = pickle.load(handle)
with open('label_encoder.pickle', 'rb') as handle:
label_encoder = pickle.load(handle)
with open('model_config.json', 'r') as f:
config = json.load(f)
return model, tokenizer, label_encoder, config
except Exception as e:
raise ImportError(f"Error loading model components: {str(e)}")
# Text cleaning function
def clean_text(text, labels_to_remove=[]):
"""Clean and normalize text"""
if pd.isna(text) or not isinstance(text, str):
return ""
text = str(text)
text = text.lower()
# Remove URLs
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
# Remove special characters but keep basic punctuation
text = re.sub(r'[^a-zA-Z\s.,!?;:]', ' ', text)
# Remove the emotion labels themselves to prevent leakage
if labels_to_remove:
for label in labels_to_remove:
pattern = r'\b' + re.escape(label.lower()) + r'\b'
text = re.sub(pattern, ' ', text, flags=re.IGNORECASE)
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text).strip()
return text
# Prediction function
def predict_emotion(text, top_k=5):
"""Predict emotion from text with top-k confidence scores"""
try:
model, tokenizer, label_encoder, config = load_model_components()
MAX_LEN = config['MAX_LEN']
# Clean text
EMOTION_LABELS = list(label_encoder.classes_)
cleaned = clean_text(text, labels_to_remove=EMOTION_LABELS)
if not cleaned:
return [("No valid text", 0.0)]
# Tokenize and pad
sequence = tokenizer.texts_to_sequences([cleaned])
padded = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
# Predict
prediction = model.predict(padded, verbose=0)[0]
# Get top-k predictions
top_indices = np.argsort(prediction)[-top_k:][::-1]
results = []
for idx in top_indices:
emotion = label_encoder.classes_[idx]
confidence = prediction[idx] * 100
results.append((emotion, confidence))
return results
except Exception as e:
return [("Error", 0.0)]
# Gradio interface
def emotion_classifier(text, top_k):
"""Main function for Gradio interface"""
if not text or not text.strip():
return "❌ Please enter some text to analyze emotions."
try:
predictions = predict_emotion(text, int(top_k))
if not predictions or len(predictions) == 0:
return "❌ No predictions generated. Please try different text."
# Format results
result_text = f"**Emotion Predictions for:** {text}\n\n"
result_text += "| Emotion | Confidence (%) |\n"
result_text += "|---------|----------------|\n"
for emotion, confidence in predictions:
if confidence > 0:
result_text += f"| {emotion} | {confidence:.2f} |\n"
else:
result_text += f"| {emotion} | Not available |\n"
return result_text
except Exception as e:
return f"❌ Error during analysis: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Emotion Classification App", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🧠 Emotion Classification from Text
This application uses a bidirectional LSTM model to classify emotions from text input.
The model was trained on 287,000 AI-generated question-answer pairs covering 75 different emotions.
""")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Enter Text for Emotion Analysis",
placeholder="Type your text here (e.g., 'I feel so happy about my achievements!')",
lines=5,
value="I made the mistake, but I'm determined to fix it immediately and ensure it never happens again"
)
top_k_slider = gr.Slider(
minimum=3,
maximum=10,
value=5,
step=1,
label="Number of Emotions to Show"
)
submit_btn = gr.Button("🔍 Analyze Emotions", variant="primary")
# Example texts
gr.Markdown("### Example Texts:")
examples = gr.Examples(
examples=[
["I made the mistake, but I'm determined to fix it immediately and ensure it never happens again"],
["I heard that rumor about my colleague, and honestly, I feel a rush of competitive schadenfreude."],
["The beauty of the mountain view left me speechless; I felt incredibly small and insignificant."],
["I'm just exhausted and drained. I don't feel anything anymore, not even stress."],
["Seeing my childhood home again brought back a wave of deep melancholy and sweet sadness."]
],
inputs=[input_text],
label="Try these examples"
)
with gr.Column():
output = gr.Markdown(
label="Emotion Predictions",
value="Enter text and click 'Analyze Emotions' to see predictions."
)
submit_btn.click(
fn=emotion_classifier,
inputs=[input_text, top_k_slider],
outputs=output
)
# Model info section
with gr.Accordion("Model Information", open=False):
gr.Markdown("""
### Model Architecture
- **Embedding Layer**: Pre-trained Word2Vec embeddings (128 dimensions)
- **Bidirectional LSTM**: Two layers (128 and 64 units) for sequence processing
- **Dense Layers**: 256 and 128 units with dropout for regularization
- **Output Layer**: 75 neurons (one per emotion) with softmax activation
### Training Details
- **Dataset**: 287,280 AI-generated question-answer pairs
- **Emotions**: 75 different emotion categories
- **Validation Accuracy**: 87.62%
- **Test Accuracy**: 87.84%
### Features
- Real-time emotion classification
- Confidence scoring for predictions
- Support for complex emotional contexts
- Robust text preprocessing pipeline
""")
# Launch the app
if __name__ == "__main__":
demo.launch() |