Spaces:

enigmaize
/

nlp_project

Sleeping

App Files Files Community

nlp_project / app.py

enigmaize

Update app.py

6b6b875 verified 4 months ago

raw

history blame contribute delete

8.22 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import json
	import pickle
	import re

	# Create a simple fallback prediction function
	def predict_emotion_fallback(text, top_k=5):
	"""Fallback prediction function for testing"""
	# Return some sample predictions for demonstration
	sample_predictions = [
	("Wonder", 42.88),
	("Relief", 6.86),
	("Intrigue", 6.62),
	("Joy", 5.31),
	("Curiosity", 4.97)
	]
	return sample_predictions[:top_k]

	# Load saved components with comprehensive error handling
	def load_model_components():
	"""Load all saved model components with error handling"""
	try:
	# Try to import tensorflow components
	from tensorflow.keras.models import load_model
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences

	# Load model
	model = load_model('best_emotion_model.h5')

	# Load tokenizer
	with open('tokenizer.pickle', 'rb') as handle:
	tokenizer = pickle.load(handle)

	# Load label encoder
	with open('label_encoder.pickle', 'rb') as handle:
	label_encoder = pickle.load(handle)

	# Load config
	with open('model_config.json', 'r') as f:
	config = json.load(f)

	return model, tokenizer, label_encoder, config

	except Exception as e:
	print(f"Model loading error: {str(e)}")
	return None, None, None, None

	# Text cleaning function
	def clean_text(text, labels_to_remove=[]):
	"""Clean and normalize text"""
	if pd.isna(text) or not isinstance(text, str):
	return ""

	text = str(text)
	text = text.lower()

	# Remove URLs
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)

	# Remove special characters but keep basic punctuation
	text = re.sub(r'[^a-zA-Z\s.,!?;:]', ' ', text)

	# Remove the emotion labels themselves to prevent leakage
	if labels_to_remove:
	for label in labels_to_remove:
	pattern = r'\b' + re.escape(label.lower()) + r'\b'
	text = re.sub(pattern, ' ', text, flags=re.IGNORECASE)

	# Remove extra whitespace
	text = re.sub(r'\s+', ' ', text).strip()

	return text

	# Prediction function with fallback
	def predict_emotion(text, top_k=5):
	"""Predict emotion from text with top-k confidence scores"""
	# Get model components
	model, tokenizer, label_encoder, config = load_model_components()

	# If model failed to load, use fallback
	if model is None or tokenizer is None or label_encoder is None:
	return predict_emotion_fallback(text, top_k)

	try:
	MAX_LEN = config['MAX_LEN']

	# Clean text
	EMOTION_LABELS = list(label_encoder.classes_)
	cleaned = clean_text(text, labels_to_remove=EMOTION_LABELS)

	if not cleaned:
	return [("No valid text", 0.0)]

	# Tokenize and pad
	sequence = tokenizer.texts_to_sequences([cleaned])
	padded = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')

	# Predict
	prediction = model.predict(padded, verbose=0)[0]

	# Get top-k predictions
	top_indices = np.argsort(prediction)[-top_k:][::-1]

	results = []
	for idx in top_indices:
	emotion = label_encoder.classes_[idx]
	confidence = prediction[idx] * 100
	results.append((emotion, confidence))

	return results

	except Exception as e:
	print(f"Prediction error: {str(e)}")
	return predict_emotion_fallback(text, top_k)

	# Gradio interface
	def emotion_classifier(text, top_k):
	"""Main function for Gradio interface"""
	if not text or not text.strip():
	return "❌ Please enter some text to analyze emotions."

	try:
	predictions = predict_emotion(text, int(top_k))

	if not predictions or len(predictions) == 0:
	return "❌ No predictions generated. Please try different text."

	# Format results as HTML table for better display
	result_html = f"<h3>Emotion Predictions for:</h3><p>{text}</p>"
	result_html += "<table border='1' cellpadding='5' cellspacing='0' style='border-collapse: collapse;'>"
	result_html += "<tr><th>Emotion</th><th>Confidence (%)</th></tr>"

	for emotion, confidence in predictions:
	if confidence > 0:
	result_html += f"<tr><td>{emotion}</td><td>{confidence:.2f}%</td></tr>"
	else:
	result_html += f"<tr><td>{emotion}</td><td>Not available</td></tr>"

	result_html += "</table>"

	return result_html

	except Exception as e:
	return f"❌ Error during analysis: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Emotion Classification App", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🧠 Emotion Classification from Text
	This application uses a bidirectional LSTM model to classify emotions from text input.
	The model was trained on 287,000 AI-generated question-answer pairs covering 75 different emotions.
	""")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Enter Text for Emotion Analysis",
	placeholder="Type your text here (e.g., 'I feel so happy about my achievements!')",
	lines=5,
	value="I heard that rumor about my colleague, and honestly, I feel a rush of competitive schadenfreude."
	)

	top_k_slider = gr.Slider(
	minimum=3,
	maximum=10,
	value=5,
	step=1,
	label="Number of Emotions to Show"
	)

	submit_btn = gr.Button("🔍 Analyze Emotions", variant="primary")

	# Example texts
	gr.Markdown("### Example Texts:")
	examples = gr.Examples(
	examples=[
	["I made the mistake, but I'm determined to fix it immediately and ensure it never happens again"],
	["I heard that rumor about my colleague, and honestly, I feel a rush of competitive schadenfreude."],
	["The beauty of the mountain view left me speechless; I felt incredibly small and insignificant."],
	["I'm just exhausted and drained. I don't feel anything anymore, not even stress."],
	["Seeing my childhood home again brought back a wave of deep melancholy and sweet sadness."]
	],
	inputs=[input_text],
	label="Try these examples"
	)

	with gr.Column():
	output = gr.HTML(
	label="Emotion Predictions",
	value="<p>Enter text and click 'Analyze Emotions' to see predictions.</p>"
	)

	submit_btn.click(
	fn=emotion_classifier,
	inputs=[input_text, top_k_slider],
	outputs=output
	)

	# Model info section
	with gr.Accordion("Model Information", open=False):
	gr.Markdown("""
	### Model Architecture
	- Embedding Layer: Pre-trained Word2Vec embeddings (128 dimensions)
	- Bidirectional LSTM: Two layers (128 and 64 units) for sequence processing
	- Dense Layers: 256 and 128 units with dropout for regularization
	- Output Layer: 75 neurons (one per emotion) with softmax activation

	### Training Details
	- Dataset: 287,280 AI-generated question-answer pairs
	- Emotions: 75 different emotion categories
	- Validation Accuracy: 87.62%
	- Test Accuracy: 87.84%

	### Features
	- Real-time emotion classification
	- Confidence scoring for predictions
	- Support for complex emotional contexts
	- Robust text preprocessing pipeline
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()