Spaces:

hamxaameer
/

CustomerFeedbackAnalysis

Sleeping

File size: 20,764 Bytes

import gradio as gr
import torch
import pickle
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
import os
import io
import traceback

# Global variables for model components
loaded_model = None
loaded_tokenizer = None
model_device = torch.device('cpu')  # Always use CPU for Hugging Face Spaces

def safe_pickle_load(file_path):
    """Safely load pickle file with CPU mapping for all torch tensors"""
    class CPUUnpickler(pickle.Unpickler):
        def find_class(self, module, name):
            if module == 'torch.storage' and name == '_load_from_bytes':
                return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
            else:
                return super().find_class(module, name)
    
    with open(file_path, 'rb') as f:
        return CPUUnpickler(f).load()

def load_trained_model():
    """Load the trained BERT model with comprehensive CPU compatibility"""
    global loaded_model, loaded_tokenizer
    
    print("🚀 Starting model loading process...")
    print(f"🖥️ Target device: {model_device}")
    print(f"📁 Current directory: {os.getcwd()}")
    print(f"📋 Files in directory: {os.listdir('.')}")
    
    loading_success = False
    
    try:
        # Method 1: Try loading from pickle with advanced CPU mapping
        if os.path.exists('sentiment_pipeline.pkl'):
            print("\n📦 Method 1: Loading from sentiment_pipeline.pkl...")
            try:
                # First try the safe pickle loader
                pipeline = safe_pickle_load('sentiment_pipeline.pkl')
                loaded_model = pipeline['model']
                loaded_tokenizer = pipeline['tokenizer']
                
                # Force everything to CPU
                loaded_model = loaded_model.to('cpu')
                loaded_model.eval()
                
                print("✅ Successfully loaded from pickle with safe CPU mapping!")
                loading_success = True
                
            except Exception as e:
                print(f"❌ Safe pickle loading failed: {e}")
                
                # Fallback: Try with torch.load override
                try:
                    print("🔄 Trying torch.load override method...")
                    
                    # Override torch.load temporarily
                    original_torch_load = torch.load
                    torch.load = lambda *args, **kwargs: original_torch_load(*args, **{**kwargs, 'map_location': 'cpu'})
                    
                    with open('sentiment_pipeline.pkl', 'rb') as f:
                        pipeline = pickle.load(f)
                        loaded_model = pipeline['model']
                        loaded_tokenizer = pipeline['tokenizer']
                    
                    # Restore original torch.load
                    torch.load = original_torch_load
                    
                    # Ensure CPU
                    loaded_model = loaded_model.to('cpu')
                    loaded_model.eval()
                    
                    print("✅ Successfully loaded with torch.load override!")
                    loading_success = True
                    
                except Exception as e2:
                    print(f"❌ Torch.load override also failed: {e2}")
        
        # Method 2: Try loading from HuggingFace format
        if not loading_success and os.path.exists('bert_sentiment_model'):
            print("\n🤗 Method 2: Loading from HuggingFace format...")
            try:
                loaded_model = BertForSequenceClassification.from_pretrained('bert_sentiment_model')
                loaded_tokenizer = BertTokenizer.from_pretrained('bert_sentiment_model')
                loaded_model = loaded_model.to('cpu')
                loaded_model.eval()
                print("✅ Successfully loaded from HuggingFace format!")
                loading_success = True
            except Exception as e:
                print(f"❌ HuggingFace format loading failed: {e}")
        
        # Method 3: Try loading individual model files
        if not loading_success:
            for potential_path in ['./model', './trained_model', './fine_tuned_model']:
                if os.path.exists(potential_path):
                    print(f"\n🔍 Method 3: Trying to load from {potential_path}...")
                    try:
                        loaded_model = BertForSequenceClassification.from_pretrained(potential_path)
                        loaded_tokenizer = BertTokenizer.from_pretrained(potential_path)
                        loaded_model = loaded_model.to('cpu')
                        loaded_model.eval()
                        print(f"✅ Successfully loaded from {potential_path}!")
                        loading_success = True
                        break
                    except Exception as e:
                        print(f"❌ Loading from {potential_path} failed: {e}")
        
        # Method 4: Load base BERT model as fallback
        if not loading_success:
            print("\n⚠️ Method 4: Loading base BERT model as fallback...")
            try:
                loaded_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
                loaded_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
                loaded_model = loaded_model.to('cpu')
                loaded_model.eval()
                print("✅ Base BERT model loaded successfully!")
                print("⚠️ NOTE: This is NOT your fine-tuned model!")
                loading_success = True
            except Exception as e:
                print(f"❌ Even base BERT loading failed: {e}")
                return False
        
        # Verify model is working
        if loaded_model is not None and loaded_tokenizer is not None:
            print("\n🧪 Testing model functionality...")
            try:
                test_input = "This is a test sentence"
                inputs = loaded_tokenizer(test_input, return_tensors='pt', truncation=True, padding=True, max_length=128)
                with torch.no_grad():
                    outputs = loaded_model(**inputs)
                    probabilities = torch.softmax(outputs.logits, dim=1)
                
                print("✅ Model test prediction successful!")
                print(f"📊 Model parameters: {sum(p.numel() for p in loaded_model.parameters()):,}")
                print(f"🎯 Test output shape: {outputs.logits.shape}")
                print(f"📈 Test probabilities: {probabilities.squeeze().tolist()}")
                return True
                
            except Exception as e:
                print(f"❌ Model test failed: {e}")
                print(f"📋 Full error: {traceback.format_exc()}")
                return False
        else:
            print("❌ Model or tokenizer is None after loading")
            return False
            
    except Exception as e:
        print(f"❌ Critical error in model loading: {e}")
        print(f"📋 Full traceback: {traceback.format_exc()}")
        return False

def predict_sentiment_with_details(text):
    """Predict sentiment with detailed output and error handling"""
    
    # Check if model is loaded
    if loaded_model is None or loaded_tokenizer is None:
        return (
            "❌ **ERROR: Model not loaded!**\n\nThe fine-tuned model could not be loaded. Using base BERT instead.\n\n**Possible causes:**\n- Model file not uploaded to Hugging Face Space\n- Device compatibility issues\n- File corruption\n\n**Solutions:**\n- Make sure `sentiment_pipeline.pkl` is uploaded\n- Check Hugging Face Space logs\n- Try re-uploading the model file",
            pd.DataFrame(),
            "Error: No model",
            "Model not available - check upload"
        )
    
    # Check if text is provided
    if not text or not text.strip():
        return (
            "⚠️ **Please enter some text to analyze**",
            pd.DataFrame(),
            "No input",
            "Enter text above"
        )
    
    try:
        # Clean and prepare text
        clean_text = text.strip()
        print(f"🔍 Analyzing: {clean_text[:50]}{'...' if len(clean_text) > 50 else ''}")
        
        # Tokenize input (ensure CPU)
        inputs = loaded_tokenizer(
            clean_text, 
            return_tensors='pt', 
            truncation=True, 
            padding=True, 
            max_length=128
        )
        
        # Move inputs to CPU explicitly
        inputs = {k: v.to('cpu') for k, v in inputs.items()}
        
        # Get prediction (all on CPU)
        with torch.no_grad():
            outputs = loaded_model(**inputs)
            probabilities = torch.softmax(outputs.logits, dim=1)
            prediction = torch.argmax(probabilities, dim=1).item()
            confidence = probabilities.max().item()
        
        # Map labels
        label_mapping = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
        predicted_sentiment = label_mapping[prediction]
        
        # Create confidence scores for visualization
        confidence_data = pd.DataFrame({
            'Sentiment': ['Negative', 'Neutral', 'Positive'],
            'Confidence': [
                float(probabilities[0][0].item()),
                float(probabilities[0][1].item()),
                float(probabilities[0][2].item())
            ]
        })
        
        # Create detailed result message
        emoji_map = {'Negative': '😞', 'Neutral': '😐', 'Positive': '😊'}
        emoji = emoji_map[predicted_sentiment]
        
        # Check if this is the fine-tuned model or base model
        model_type = "Fine-tuned BERT" if os.path.exists('sentiment_pipeline.pkl') else "Base BERT (not fine-tuned)"
        
        result_message = f"""
### {emoji} **{predicted_sentiment}** Sentiment Detected

**Confidence Score:** {confidence:.1%}

**Input Text:** *"{clean_text[:100]}{'...' if len(clean_text) > 100 else ''}"*

**Analysis Details:**
- **Negative:** {probabilities[0][0].item():.1%}
- **Neutral:** {probabilities[0][1].item():.1%}  
- **Positive:** {probabilities[0][2].item():.1%}

**Model Type:** {model_type}
**Status:** ✅ Prediction completed successfully
        """
        
        status_message = f"✅ Analysis complete - {predicted_sentiment} sentiment ({confidence:.1%} confidence)"
        
        return result_message, confidence_data, predicted_sentiment, status_message
        
    except Exception as e:
        error_msg = f"❌ **Prediction Error:** {str(e)}\n\nDetailed error information:\n```\n{traceback.format_exc()}\n```"
        print(f"Prediction error: {e}")
        print(f"Full traceback: {traceback.format_exc()}")
        return error_msg, pd.DataFrame(), "Error", f"Error: {str(e)}"

def create_gradio_interface():
    """Create enhanced Gradio interface with comprehensive model status"""
    
    # Custom CSS for better styling
    css = """
    .model-status {
        padding: 1rem;
        border-radius: 8px;
        margin-bottom: 1rem;
        text-align: center;
        font-weight: bold;
    }
    .status-success {
        background-color: #d4edda;
        color: #155724;
        border: 1px solid #c3e6cb;
    }
    .status-warning {
        background-color: #fff3cd;
        color: #856404;
        border: 1px solid #ffeaa7;
    }
    .status-error {
        background-color: #f8d7da;
        color: #721c24;
        border: 1px solid #f5c6cb;
    }
    """
    
    with gr.Blocks(css=css, title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo:
        
        # Header
        gr.HTML("""
        <div style="text-align: center; padding: 2rem; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 2rem;">
            <h1>🤖 BERT Sentiment Classification</h1>
            <p>Advanced AI-powered sentiment analysis using BERT</p>
            <p><strong>🌍 Permanently hosted on Hugging Face Spaces</strong></p>
        </div>
        """)
        
        # Model status indicator
        model_status = gr.HTML()
        
        # Debug information (collapsible)
        with gr.Accordion("🔧 Debug Information", open=False):
            debug_info = gr.HTML()
        
        with gr.Row():
            with gr.Column(scale=3):
                gr.Markdown("### 📝 Enter Text for Sentiment Analysis")
                
                text_input = gr.Textbox(
                    label="Input Text",
                    placeholder="Enter your text here... (e.g., 'This product is amazing! Great quality and fast delivery.')",
                    lines=6,
                    max_lines=20,
                    value=""
                )
                
                with gr.Row():
                    analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
                    clear_btn = gr.Button("🗑️ Clear", size="sm")
                
                gr.Markdown("### 💡 Example Texts to Try:")
                examples = gr.Examples(
                    examples=[
                        ["This product exceeded all my expectations! Outstanding quality and excellent customer service."],
                        ["I'm completely disappointed with this purchase. Poor quality and terrible customer support."],
                        ["The product is decent. It works as described but nothing extraordinary."],
                        ["Best purchase I've made this year! Highly recommend to everyone."],
                        ["Absolutely horrible experience. Would never buy from this company again."],
                        ["It's okay, good value for the price but could be improved."]
                    ],
                    inputs=text_input,
                    label=None
                )
            
            with gr.Column(scale=2):
                gr.Markdown("### 📊 Analysis Results")
                
                result_output = gr.Markdown(
                    value="*Enter text and click 'Analyze Sentiment' to see results*"
                )
                
                confidence_plot = gr.BarPlot(
                    x="Sentiment",
                    y="Confidence",
                    title="Confidence Scores by Sentiment Class",
                    x_title="Sentiment",
                    y_title="Confidence Score",
                    width=500,
                    height=300,
                    container=True
                )
                
                predicted_class = gr.Textbox(
                    label="Predicted Sentiment Class",
                    interactive=False,
                    value=""
                )
                
                status_display = gr.Textbox(
                    label="Analysis Status",
                    interactive=False,
                    value="Ready for analysis"
                )
        
        # Model Information Section
        with gr.Accordion("🔍 Model Information & Setup Guide", open=False):
            gr.Markdown(f"""
            ### 🧠 Model Architecture
            - **Base Model:** BERT (bert-base-uncased)
            - **Task:** Multi-class sentiment classification
            - **Classes:** Negative 😞, Neutral 😐, Positive 😊
            - **Max Sequence Length:** 128 tokens
            - **Device:** {model_device}
            
            ### 📁 Required Files for Fine-tuned Model
            To use your fine-tuned model, make sure these files are uploaded to your Hugging Face Space:
            
            1. **sentiment_pipeline.pkl** - Your trained model (REQUIRED)
            2. **app.py** - This application file
            3. **requirements.txt** - Python dependencies
            4. **README.md** - Space configuration
            
            ### 🚀 Hugging Face Space Setup Instructions
            
            1. **Create New Space:**
               - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
               - Click "Create new Space"
               - Choose **Gradio** as SDK
               - Select **CPU** as hardware (recommended for this model)
            
            2. **Upload Files:**
               - Upload `sentiment_pipeline.pkl` (your trained model)
               - Upload `app.py` (this file)
               - Upload `requirements.txt`
               - Upload `README.md`
            
            3. **Space Configuration:**
               - Make sure your `README.md` has this header:
               ```yaml
               ---
               title: BERT Sentiment Analyzer
               emoji: 🤖
               colorFrom: blue
               colorTo: purple
               sdk: gradio
               sdk_version: 3.40.0
               app_file: app.py
               pinned: false
               ---
               ```
            
            ### ⚠️ Troubleshooting
            
            **If you see "Model Not Loaded":**
            - Check if `sentiment_pipeline.pkl` is uploaded
            - Verify file size (should be ~400MB+)
            - Check Space logs for errors
            - Make sure you selected **CPU** hardware
            
            **If predictions seem wrong:**
            - The app might be using base BERT instead of your fine-tuned model
            - Re-upload `sentiment_pipeline.pkl`
            - Check the model status indicator above
            """)
        
        # Event handlers
        def clear_inputs():
            return "", "*Enter text to see analysis*", pd.DataFrame(), "", "Ready for analysis"
        
        def update_model_status():
            if loaded_model is not None and loaded_tokenizer is not None:
                if os.path.exists('sentiment_pipeline.pkl'):
                    return """<div class="model-status status-success">✅ Fine-tuned Model Loaded Successfully - Ready for Analysis!</div>"""
                else:
                    return """<div class="model-status status-warning">⚠️ Base BERT Model Loaded - Upload sentiment_pipeline.pkl for fine-tuned predictions</div>"""
            else:
                return """<div class="model-status status-error">❌ Model Loading Failed - Check files and logs</div>"""
        
        def get_debug_info():
            debug_html = f"""
            <div style="font-family: monospace; background: #f8f9fa; padding: 1rem; border-radius: 5px;">
                <strong>Debug Information:</strong><br>
                📁 Current directory: {os.getcwd()}<br>
                📋 Files present: {', '.join(os.listdir('.'))}<br>
                🤖 Model loaded: {loaded_model is not None}<br>
                🔤 Tokenizer loaded: {loaded_tokenizer is not None}<br>
                💾 Pickle file exists: {os.path.exists('sentiment_pipeline.pkl')}<br>
                🖥️ Device: {model_device}<br>
                🐍 Python version: {torch.__version__}<br>
            </div>
            """
            return debug_html
        
        # Connect events
        analyze_btn.click(
            fn=predict_sentiment_with_details,
            inputs=text_input,
            outputs=[result_output, confidence_plot, predicted_class, status_display]
        )
        
        clear_btn.click(
            fn=clear_inputs,
            outputs=[text_input, result_output, confidence_plot, predicted_class, status_display]
        )
        
        # Update status on load
        demo.load(
            fn=update_model_status,
            outputs=model_status
        )
        
        demo.load(
            fn=get_debug_info,
            outputs=debug_info
        )
    
    return demo

# Load model and launch interface
if __name__ == "__main__":
    print("🚀 Starting BERT Sentiment Analyzer...")
    print("=" * 60)
    
    # Load the model
    model_loaded = load_trained_model()
    
    print("\n" + "=" * 60)
    if model_loaded:
        print("🎉 MODEL LOADING COMPLETED!")
        if os.path.exists('sentiment_pipeline.pkl'):
            print("✅ Fine-tuned model ready for predictions!")
        else:
            print("⚠️ Using base BERT model (fine-tuned model not found)")
    else:
        print("❌ Model loading encountered issues - check logs above")
    
    print("✅ Creating Gradio interface...")
    demo = create_gradio_interface()
    
    print("🌐 Launching web interface...")
    print("=" * 60)
    
    # Launch the interface
    demo.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7860,       # Default Gradio port
        share=False             # Don't create public link (use Space URL)
    )