Spaces:

enigmaize
/

nlp_app

Sleeping

File size: 7,129 Bytes

6ac050a
 
 
 
 
 
 
c7cadf9
6ac050a
c7cadf9
751db91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac050a
751db91
c7cadf9
6ac050a
 
 
 
751db91
6ac050a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7cadf9
6ac050a
 
751db91
c7cadf9
751db91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac050a
751db91
c7cadf9
6ac050a
 
 
 
751db91
 
6ac050a
751db91
 
 
 
 
 
c7cadf9
 
 
 
751db91
 
 
c7cadf9
751db91
c7cadf9
751db91
c7cadf9
6ac050a
751db91
 
6ac050a
 
751db91
6ac050a
 
 
 
 
 
 
 
 
 
 
751db91
c7cadf9
6ac050a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7cadf9
751db91
c7cadf9
751db91
6ac050a

import gradio as gr
import pandas as pd
import numpy as np
import json
import pickle
import re

# Load saved components
def load_model_components():
    """Load all saved model components"""
    try:
        from tensorflow.keras.models import load_model
        from tensorflow.keras.preprocessing.text import Tokenizer
        from tensorflow.keras.preprocessing.sequence import pad_sequences
        
        model = load_model('best_emotion_model.h5')
        
        with open('tokenizer.pickle', 'rb') as handle:
            tokenizer = pickle.load(handle)
        
        with open('label_encoder.pickle', 'rb') as handle:
            label_encoder = pickle.load(handle)
        
        with open('model_config.json', 'r') as f:
            config = json.load(f)
        
        return model, tokenizer, label_encoder, config
    
    except Exception as e:
        raise ImportError(f"Error loading model components: {str(e)}")

# Text cleaning function
def clean_text(text, labels_to_remove=[]):
    """Clean and normalize text"""
    if pd.isna(text) or not isinstance(text, str):
        return ""
    
    text = str(text)
    text = text.lower()

    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

    # Remove special characters but keep basic punctuation
    text = re.sub(r'[^a-zA-Z\s.,!?;:]', ' ', text)

    # Remove the emotion labels themselves to prevent leakage
    if labels_to_remove:
        for label in labels_to_remove:
            pattern = r'\b' + re.escape(label.lower()) + r'\b'
            text = re.sub(pattern, ' ', text, flags=re.IGNORECASE)

    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    return text

# Prediction function
def predict_emotion(text, top_k=5):
    """Predict emotion from text with top-k confidence scores"""
    try:
        model, tokenizer, label_encoder, config = load_model_components()
        MAX_LEN = config['MAX_LEN']
        
        # Clean text
        EMOTION_LABELS = list(label_encoder.classes_)
        cleaned = clean_text(text, labels_to_remove=EMOTION_LABELS)
        
        if not cleaned:
            return [("No valid text", 0.0)]
        
        # Tokenize and pad
        sequence = tokenizer.texts_to_sequences([cleaned])
        padded = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
        
        # Predict
        prediction = model.predict(padded, verbose=0)[0]
        
        # Get top-k predictions
        top_indices = np.argsort(prediction)[-top_k:][::-1]
        
        results = []
        for idx in top_indices:
            emotion = label_encoder.classes_[idx]
            confidence = prediction[idx] * 100
            results.append((emotion, confidence))
        
        return results
    
    except Exception as e:
        return [("Error", 0.0)]

# Gradio interface
def emotion_classifier(text, top_k):
    """Main function for Gradio interface"""
    if not text or not text.strip():
        return "❌ Please enter some text to analyze emotions."
    
    try:
        predictions = predict_emotion(text, int(top_k))
        
        if not predictions or len(predictions) == 0:
            return "❌ No predictions generated. Please try different text."
        
        # Format results
        result_text = f"**Emotion Predictions for:** {text}\n\n"
        result_text += "| Emotion | Confidence (%) |\n"
        result_text += "|---------|----------------|\n"
        
        for emotion, confidence in predictions:
            if confidence > 0:
                result_text += f"| {emotion} | {confidence:.2f} |\n"
            else:
                result_text += f"| {emotion} | Not available |\n"
        
        return result_text
    
    except Exception as e:
        return f"❌ Error during analysis: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Emotion Classification App", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🧠 Emotion Classification from Text
    This application uses a bidirectional LSTM model to classify emotions from text input. 
    The model was trained on 287,000 AI-generated question-answer pairs covering 75 different emotions.
    """)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Enter Text for Emotion Analysis",
                placeholder="Type your text here (e.g., 'I feel so happy about my achievements!')",
                lines=5,
                value="I made the mistake, but I'm determined to fix it immediately and ensure it never happens again"
            )
            
            top_k_slider = gr.Slider(
                minimum=3,
                maximum=10,
                value=5,
                step=1,
                label="Number of Emotions to Show"
            )
            
            submit_btn = gr.Button("🔍 Analyze Emotions", variant="primary")
            
            # Example texts
            gr.Markdown("### Example Texts:")
            examples = gr.Examples(
                examples=[
                    ["I made the mistake, but I'm determined to fix it immediately and ensure it never happens again"],
                    ["I heard that rumor about my colleague, and honestly, I feel a rush of competitive schadenfreude."],
                    ["The beauty of the mountain view left me speechless; I felt incredibly small and insignificant."],
                    ["I'm just exhausted and drained. I don't feel anything anymore, not even stress."],
                    ["Seeing my childhood home again brought back a wave of deep melancholy and sweet sadness."]
                ],
                inputs=[input_text],
                label="Try these examples"
            )
        
        with gr.Column():
            output = gr.Markdown(
                label="Emotion Predictions", 
                value="Enter text and click 'Analyze Emotions' to see predictions."
            )
    
    submit_btn.click(
        fn=emotion_classifier,
        inputs=[input_text, top_k_slider],
        outputs=output
    )
    
    # Model info section
    with gr.Accordion("Model Information", open=False):
        gr.Markdown("""
        ### Model Architecture
        - **Embedding Layer**: Pre-trained Word2Vec embeddings (128 dimensions)
        - **Bidirectional LSTM**: Two layers (128 and 64 units) for sequence processing
        - **Dense Layers**: 256 and 128 units with dropout for regularization
        - **Output Layer**: 75 neurons (one per emotion) with softmax activation
        
        ### Training Details
        - **Dataset**: 287,280 AI-generated question-answer pairs
        - **Emotions**: 75 different emotion categories
        - **Validation Accuracy**: 87.62%
        - **Test Accuracy**: 87.84%
        
        ### Features
        - Real-time emotion classification
        - Confidence scoring for predictions
        - Support for complex emotional contexts
        - Robust text preprocessing pipeline
        """)

# Launch the app
if __name__ == "__main__":
    demo.launch()