Spaces:

hamxaameer
/

CustomerFeedbackClassification

Sleeping

File size: 9,983 Bytes

import gradio as gr
import torch
import pickle
import pandas as pd
import os
import io

# Global variables
loaded_pipeline = None
model_device = 'cpu'  # Force CPU for Hugging Face Spaces

def load_model():
    """Load the BERT sentiment model from pickle"""
    global loaded_pipeline
    
    print(f"🖥️ Using device: {model_device}")
    
    try:
        model_file = 'sentiment_pipeline.pkl'
        
        if not os.path.exists(model_file):
            print(f"❌ Model file not found: {model_file}")
            return False
        
        print(f"📦 Loading BERT model from {model_file}...")
        
        # Custom unpickler for CPU compatibility
        class CPUUnpickler(pickle.Unpickler):
            def find_class(self, module, name):
                if module == 'torch.storage' and name == '_load_from_bytes':
                    return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
                else:
                    return super().find_class(module, name)
        
        with open(model_file, 'rb') as f:
            loaded_pipeline = pickle.load(f)
        
        # Move model to CPU
        if 'model' in loaded_pipeline:
            loaded_pipeline['model'] = loaded_pipeline['model'].to('cpu')
            loaded_pipeline['model'].eval()
        
        print(f"✅ Successfully loaded BERT model")
        
        if 'best_val_accuracy' in loaded_pipeline:
            print(f"🎯 Validation Accuracy: {loaded_pipeline['best_val_accuracy']:.4f}")
        
        return True
            
    except Exception as e:
        print(f"❌ Loading failed: {e}")
        import traceback
        traceback.print_exc()
        return False

def predict_sentiment(text):
    """Predict sentiment using BERT model"""
    
    if loaded_pipeline is None:
        return {
            'sentiment': 'error',
            'confidence': 0.0,
            'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0},
            'error': 'Model not loaded'
        }
    
    try:
        model = loaded_pipeline['model']
        tokenizer = loaded_pipeline['tokenizer']
        max_length = loaded_pipeline.get('training_config', {}).get('max_length', 128)
        
        # Tokenize
        inputs = tokenizer(
            text,
            return_tensors='pt',
            truncation=True,
            padding=True,
            max_length=max_length
        )
        
        inputs = {k: v.to('cpu') for k, v in inputs.items()}
        
        # Predict
        model.eval()
        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.softmax(outputs.logits, dim=1)
            prediction = torch.argmax(probabilities, dim=1).item()
            confidence = probabilities.max().item()
        
        sentiment_names = ['negative', 'neutral', 'positive']
        
        return {
            'sentiment': sentiment_names[prediction],
            'confidence': confidence,
            'scores': {
                'negative': float(probabilities[0][0].item()),
                'neutral': float(probabilities[0][1].item()),
                'positive': float(probabilities[0][2].item())
            }
        }
        
    except Exception as e:
        print(f"Prediction error: {e}")
        return {
            'sentiment': 'error',
            'confidence': 0.0,
            'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0},
            'error': str(e)
        }

def analyze_sentiment(text):
    """Analyze sentiment and return formatted results"""
    
    if loaded_pipeline is None:
        return (
            "❌ **Model not loaded!** Please upload sentiment_pipeline.pkl",
            pd.DataFrame(),
            "Error",
            "Model not available"
        )
    
    if not text or not text.strip():
        return (
            "⚠️ **Please enter text**",
            pd.DataFrame(),
            "No input",
            "Enter text above"
        )
    
    try:
        result = predict_sentiment(text.strip())
        
        if 'error' in result:
            return (
                f"❌ **Error:** {result['error']}",
                pd.DataFrame(),
                "Error",
                f"Error: {result['error']}"
            )
        
        sentiment = result['sentiment']
        confidence = result['confidence']
        scores = result['scores']
        
        # Create DataFrame for chart
        chart_data = pd.DataFrame({
            'Sentiment': ['Negative', 'Neutral', 'Positive'],
            'Confidence': [scores['negative'], scores['neutral'], scores['positive']]
        })
        
        # Emoji mapping
        emoji = {'negative': '😞', 'neutral': '😐', 'positive': '😊'}[sentiment]
        
        # Result message
        message = f"""
### {emoji} **{sentiment.title()}** Sentiment

**Confidence:** {confidence:.1%}

**Text:** *"{text[:100]}{'...' if len(text) > 100 else ''}"*

**Scores:**
- 😞 Negative: {scores['negative']:.1%}
- 😐 Neutral: {scores['neutral']:.1%}  
- 😊 Positive: {scores['positive']:.1%}

✅ Bias-corrected BERT model
        """
        
        return message, chart_data, sentiment.title(), f"✅ {sentiment.title()} ({confidence:.1%})"
        
    except Exception as e:
        return (
            f"❌ **Error:** {str(e)}",
            pd.DataFrame(),
            "Error",
            f"Error: {str(e)}"
        )

# Create Gradio interface
with gr.Blocks(title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo:
    
    gr.HTML("""
    <div style="text-align: center; padding: 2rem; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 2rem;">
        <h1>🤖 BERT Sentiment Analyzer</h1>
        <p style="font-size: 1.2em;">Bias-Corrected Sentiment Classification</p>
        <p>✅ Trained with balanced data • No negative bias</p>
    </div>
    """)
    
    model_status = gr.HTML()
    
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("### 📝 Input Text")
            
            text_input = gr.Textbox(
                label="Enter text to analyze",
                placeholder="Example: 'This product is amazing! Great quality and excellent service.'",
                lines=6
            )
            
            with gr.Row():
                analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
                clear_btn = gr.Button("🗑️ Clear", size="sm")
        
        with gr.Column(scale=2):
            gr.Markdown("### � Results")
            
            result_output = gr.Markdown("*Enter text to see results*")
            
            confidence_plot = gr.BarPlot(
                x="Sentiment",
                y="Confidence",
                title="Confidence Scores",
                width=500,
                height=300
            )
            
            predicted_class = gr.Textbox(label="Prediction", interactive=False)
            status_display = gr.Textbox(label="Status", interactive=False, value="Ready")
    
    # Examples - must be after outputs are defined
    with gr.Row():
        gr.Examples(
            examples=[
                ["This product is absolutely amazing! Best purchase ever!"],
                ["I love this so much! Outstanding quality!"],
                ["Excellent customer service and fast delivery!"],
                ["This is terrible! Worst product ever!"],
                ["Completely disappointed. Poor quality."],
                ["Awful experience. Would never buy again!"],
                ["The product is okay. Nothing special but works."],
                ["It's decent. Good value but could be better."],
                ["This is not bad at all"],
                ["Pretty good"],
            ],
            inputs=text_input,
            outputs=[result_output, confidence_plot, predicted_class, status_display],
            fn=analyze_sentiment,
            cache_examples=False
        )
    
    with gr.Accordion("ℹ️ Model Info", open=False):
        gr.Markdown("""
        ### 🧠 Model
        - **Architecture:** BERT (bert-base-uncased)
        - **Classes:** Negative 😞, Neutral 😐, Positive 😊
        - **Training:** Balanced dataset with class weights
        
        ### 🔧 Features
        - ✅ No negative bias
        - ✅ Balanced training data
        - ✅ Class-weighted loss
        - ✅ CPU optimized
        
        ### 📊 Configuration
        - Epochs: 4
        - Learning Rate: 1e-5
        - Batch Size: 16
        - Max Length: 128 tokens
        """)
    
    def clear_all():
        return "", "*Enter text*", pd.DataFrame(), "", "Ready"
    
    def update_status():
        if loaded_pipeline:
            val_acc = loaded_pipeline.get('best_val_accuracy', 'N/A')
            return f"""<div style="padding: 1rem; background: #d4edda; color: #155724; border-radius: 8px; text-align: center;">
            ✅ Model Loaded | Accuracy: {val_acc if isinstance(val_acc, str) else f'{val_acc:.2%}'}</div>"""
        return """<div style="padding: 1rem; background: #f8d7da; color: #721c24; border-radius: 8px; text-align: center;">
        ❌ Model Not Loaded</div>"""
    
    analyze_btn.click(
        fn=analyze_sentiment,
        inputs=text_input,
        outputs=[result_output, confidence_plot, predicted_class, status_display]
    )
    
    clear_btn.click(
        fn=clear_all,
        outputs=[text_input, result_output, confidence_plot, predicted_class, status_display]
    )
    
    demo.load(fn=update_status, outputs=model_status)

if __name__ == "__main__":
    print("🚀 Starting BERT Sentiment Analyzer...")
    print("=" * 60)
    
    if load_model():
        print("\n✅ MODEL READY!")
        print("🌐 Launching interface...")
        demo.launch()
    else:
        print("\n❌ FAILED TO LOAD MODEL!")
        print("📋 Ensure sentiment_pipeline.pkl exists")