import gradio as gr import torch import pickle import pandas as pd from transformers import BertTokenizer, BertForSequenceClassification import numpy as np import os import io import traceback # Global variables for model components loaded_model = None loaded_tokenizer = None model_device = torch.device('cpu') # Always use CPU for Hugging Face Spaces def safe_pickle_load(file_path): """Safely load pickle file with CPU mapping for all torch tensors""" class CPUUnpickler(pickle.Unpickler): def find_class(self, module, name): if module == 'torch.storage' and name == '_load_from_bytes': return lambda b: torch.load(io.BytesIO(b), map_location='cpu') else: return super().find_class(module, name) with open(file_path, 'rb') as f: return CPUUnpickler(f).load() def load_trained_model(): """Load the trained BERT model with comprehensive CPU compatibility""" global loaded_model, loaded_tokenizer print("๐Ÿš€ Starting model loading process...") print(f"๐Ÿ–ฅ๏ธ Target device: {model_device}") print(f"๐Ÿ“ Current directory: {os.getcwd()}") print(f"๐Ÿ“‹ Files in directory: {os.listdir('.')}") loading_success = False try: # Method 1: Try loading from pickle with advanced CPU mapping if os.path.exists('sentiment_pipeline.pkl'): print("\n๐Ÿ“ฆ Method 1: Loading from sentiment_pipeline.pkl...") try: # First try the safe pickle loader pipeline = safe_pickle_load('sentiment_pipeline.pkl') loaded_model = pipeline['model'] loaded_tokenizer = pipeline['tokenizer'] # Force everything to CPU loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โœ… Successfully loaded from pickle with safe CPU mapping!") loading_success = True except Exception as e: print(f"โŒ Safe pickle loading failed: {e}") # Fallback: Try with torch.load override try: print("๐Ÿ”„ Trying torch.load override method...") # Override torch.load temporarily original_torch_load = torch.load torch.load = lambda *args, **kwargs: original_torch_load(*args, **{**kwargs, 'map_location': 'cpu'}) with open('sentiment_pipeline.pkl', 'rb') as f: pipeline = pickle.load(f) loaded_model = pipeline['model'] loaded_tokenizer = pipeline['tokenizer'] # Restore original torch.load torch.load = original_torch_load # Ensure CPU loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โœ… Successfully loaded with torch.load override!") loading_success = True except Exception as e2: print(f"โŒ Torch.load override also failed: {e2}") # Method 2: Try loading from HuggingFace format if not loading_success and os.path.exists('bert_sentiment_model'): print("\n๐Ÿค— Method 2: Loading from HuggingFace format...") try: loaded_model = BertForSequenceClassification.from_pretrained('bert_sentiment_model') loaded_tokenizer = BertTokenizer.from_pretrained('bert_sentiment_model') loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โœ… Successfully loaded from HuggingFace format!") loading_success = True except Exception as e: print(f"โŒ HuggingFace format loading failed: {e}") # Method 3: Try loading individual model files if not loading_success: for potential_path in ['./model', './trained_model', './fine_tuned_model']: if os.path.exists(potential_path): print(f"\n๐Ÿ” Method 3: Trying to load from {potential_path}...") try: loaded_model = BertForSequenceClassification.from_pretrained(potential_path) loaded_tokenizer = BertTokenizer.from_pretrained(potential_path) loaded_model = loaded_model.to('cpu') loaded_model.eval() print(f"โœ… Successfully loaded from {potential_path}!") loading_success = True break except Exception as e: print(f"โŒ Loading from {potential_path} failed: {e}") # Method 4: Load base BERT model as fallback if not loading_success: print("\nโš ๏ธ Method 4: Loading base BERT model as fallback...") try: loaded_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) loaded_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โœ… Base BERT model loaded successfully!") print("โš ๏ธ NOTE: This is NOT your fine-tuned model!") loading_success = True except Exception as e: print(f"โŒ Even base BERT loading failed: {e}") return False # Verify model is working if loaded_model is not None and loaded_tokenizer is not None: print("\n๐Ÿงช Testing model functionality...") try: test_input = "This is a test sentence" inputs = loaded_tokenizer(test_input, return_tensors='pt', truncation=True, padding=True, max_length=128) with torch.no_grad(): outputs = loaded_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) print("โœ… Model test prediction successful!") print(f"๐Ÿ“Š Model parameters: {sum(p.numel() for p in loaded_model.parameters()):,}") print(f"๐ŸŽฏ Test output shape: {outputs.logits.shape}") print(f"๐Ÿ“ˆ Test probabilities: {probabilities.squeeze().tolist()}") return True except Exception as e: print(f"โŒ Model test failed: {e}") print(f"๐Ÿ“‹ Full error: {traceback.format_exc()}") return False else: print("โŒ Model or tokenizer is None after loading") return False except Exception as e: print(f"โŒ Critical error in model loading: {e}") print(f"๐Ÿ“‹ Full traceback: {traceback.format_exc()}") return False def predict_sentiment_with_details(text): """Predict sentiment with detailed output and error handling""" # Check if model is loaded if loaded_model is None or loaded_tokenizer is None: return ( "โŒ **ERROR: Model not loaded!**\n\nThe fine-tuned model could not be loaded. Using base BERT instead.\n\n**Possible causes:**\n- Model file not uploaded to Hugging Face Space\n- Device compatibility issues\n- File corruption\n\n**Solutions:**\n- Make sure `sentiment_pipeline.pkl` is uploaded\n- Check Hugging Face Space logs\n- Try re-uploading the model file", pd.DataFrame(), "Error: No model", "Model not available - check upload" ) # Check if text is provided if not text or not text.strip(): return ( "โš ๏ธ **Please enter some text to analyze**", pd.DataFrame(), "No input", "Enter text above" ) try: # Clean and prepare text clean_text = text.strip() print(f"๐Ÿ” Analyzing: {clean_text[:50]}{'...' if len(clean_text) > 50 else ''}") # Tokenize input (ensure CPU) inputs = loaded_tokenizer( clean_text, return_tensors='pt', truncation=True, padding=True, max_length=128 ) # Move inputs to CPU explicitly inputs = {k: v.to('cpu') for k, v in inputs.items()} # Get prediction (all on CPU) with torch.no_grad(): outputs = loaded_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) prediction = torch.argmax(probabilities, dim=1).item() confidence = probabilities.max().item() # Map labels label_mapping = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} predicted_sentiment = label_mapping[prediction] # Create confidence scores for visualization confidence_data = pd.DataFrame({ 'Sentiment': ['Negative', 'Neutral', 'Positive'], 'Confidence': [ float(probabilities[0][0].item()), float(probabilities[0][1].item()), float(probabilities[0][2].item()) ] }) # Create detailed result message emoji_map = {'Negative': '๐Ÿ˜ž', 'Neutral': '๐Ÿ˜', 'Positive': '๐Ÿ˜Š'} emoji = emoji_map[predicted_sentiment] # Check if this is the fine-tuned model or base model model_type = "Fine-tuned BERT" if os.path.exists('sentiment_pipeline.pkl') else "Base BERT (not fine-tuned)" result_message = f""" ### {emoji} **{predicted_sentiment}** Sentiment Detected **Confidence Score:** {confidence:.1%} **Input Text:** *"{clean_text[:100]}{'...' if len(clean_text) > 100 else ''}"* **Analysis Details:** - **Negative:** {probabilities[0][0].item():.1%} - **Neutral:** {probabilities[0][1].item():.1%} - **Positive:** {probabilities[0][2].item():.1%} **Model Type:** {model_type} **Status:** โœ… Prediction completed successfully """ status_message = f"โœ… Analysis complete - {predicted_sentiment} sentiment ({confidence:.1%} confidence)" return result_message, confidence_data, predicted_sentiment, status_message except Exception as e: error_msg = f"โŒ **Prediction Error:** {str(e)}\n\nDetailed error information:\n```\n{traceback.format_exc()}\n```" print(f"Prediction error: {e}") print(f"Full traceback: {traceback.format_exc()}") return error_msg, pd.DataFrame(), "Error", f"Error: {str(e)}" def create_gradio_interface(): """Create enhanced Gradio interface with comprehensive model status""" # Custom CSS for better styling css = """ .model-status { padding: 1rem; border-radius: 8px; margin-bottom: 1rem; text-align: center; font-weight: bold; } .status-success { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; } .status-warning { background-color: #fff3cd; color: #856404; border: 1px solid #ffeaa7; } .status-error { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; } """ with gr.Blocks(css=css, title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo: # Header gr.HTML("""

๐Ÿค– BERT Sentiment Classification

Advanced AI-powered sentiment analysis using BERT

๐ŸŒ Permanently hosted on Hugging Face Spaces

""") # Model status indicator model_status = gr.HTML() # Debug information (collapsible) with gr.Accordion("๐Ÿ”ง Debug Information", open=False): debug_info = gr.HTML() with gr.Row(): with gr.Column(scale=3): gr.Markdown("### ๐Ÿ“ Enter Text for Sentiment Analysis") text_input = gr.Textbox( label="Input Text", placeholder="Enter your text here... (e.g., 'This product is amazing! Great quality and fast delivery.')", lines=6, max_lines=20, value="" ) with gr.Row(): analyze_btn = gr.Button("๐Ÿ” Analyze Sentiment", variant="primary", size="lg") clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear", size="sm") gr.Markdown("### ๐Ÿ’ก Example Texts to Try:") examples = gr.Examples( examples=[ ["This product exceeded all my expectations! Outstanding quality and excellent customer service."], ["I'm completely disappointed with this purchase. Poor quality and terrible customer support."], ["The product is decent. It works as described but nothing extraordinary."], ["Best purchase I've made this year! Highly recommend to everyone."], ["Absolutely horrible experience. Would never buy from this company again."], ["It's okay, good value for the price but could be improved."] ], inputs=text_input, label=None ) with gr.Column(scale=2): gr.Markdown("### ๐Ÿ“Š Analysis Results") result_output = gr.Markdown( value="*Enter text and click 'Analyze Sentiment' to see results*" ) confidence_plot = gr.BarPlot( x="Sentiment", y="Confidence", title="Confidence Scores by Sentiment Class", x_title="Sentiment", y_title="Confidence Score", width=500, height=300, container=True ) predicted_class = gr.Textbox( label="Predicted Sentiment Class", interactive=False, value="" ) status_display = gr.Textbox( label="Analysis Status", interactive=False, value="Ready for analysis" ) # Model Information Section with gr.Accordion("๐Ÿ” Model Information & Setup Guide", open=False): gr.Markdown(f""" ### ๐Ÿง  Model Architecture - **Base Model:** BERT (bert-base-uncased) - **Task:** Multi-class sentiment classification - **Classes:** Negative ๐Ÿ˜ž, Neutral ๐Ÿ˜, Positive ๐Ÿ˜Š - **Max Sequence Length:** 128 tokens - **Device:** {model_device} ### ๐Ÿ“ Required Files for Fine-tuned Model To use your fine-tuned model, make sure these files are uploaded to your Hugging Face Space: 1. **sentiment_pipeline.pkl** - Your trained model (REQUIRED) 2. **app.py** - This application file 3. **requirements.txt** - Python dependencies 4. **README.md** - Space configuration ### ๐Ÿš€ Hugging Face Space Setup Instructions 1. **Create New Space:** - Go to [Hugging Face Spaces](https://huggingface.co/spaces) - Click "Create new Space" - Choose **Gradio** as SDK - Select **CPU** as hardware (recommended for this model) 2. **Upload Files:** - Upload `sentiment_pipeline.pkl` (your trained model) - Upload `app.py` (this file) - Upload `requirements.txt` - Upload `README.md` 3. **Space Configuration:** - Make sure your `README.md` has this header: ```yaml --- title: BERT Sentiment Analyzer emoji: ๐Ÿค– colorFrom: blue colorTo: purple sdk: gradio sdk_version: 3.40.0 app_file: app.py pinned: false --- ``` ### โš ๏ธ Troubleshooting **If you see "Model Not Loaded":** - Check if `sentiment_pipeline.pkl` is uploaded - Verify file size (should be ~400MB+) - Check Space logs for errors - Make sure you selected **CPU** hardware **If predictions seem wrong:** - The app might be using base BERT instead of your fine-tuned model - Re-upload `sentiment_pipeline.pkl` - Check the model status indicator above """) # Event handlers def clear_inputs(): return "", "*Enter text to see analysis*", pd.DataFrame(), "", "Ready for analysis" def update_model_status(): if loaded_model is not None and loaded_tokenizer is not None: if os.path.exists('sentiment_pipeline.pkl'): return """
โœ… Fine-tuned Model Loaded Successfully - Ready for Analysis!
""" else: return """
โš ๏ธ Base BERT Model Loaded - Upload sentiment_pipeline.pkl for fine-tuned predictions
""" else: return """
โŒ Model Loading Failed - Check files and logs
""" def get_debug_info(): debug_html = f"""
Debug Information:
๐Ÿ“ Current directory: {os.getcwd()}
๐Ÿ“‹ Files present: {', '.join(os.listdir('.'))}
๐Ÿค– Model loaded: {loaded_model is not None}
๐Ÿ”ค Tokenizer loaded: {loaded_tokenizer is not None}
๐Ÿ’พ Pickle file exists: {os.path.exists('sentiment_pipeline.pkl')}
๐Ÿ–ฅ๏ธ Device: {model_device}
๐Ÿ Python version: {torch.__version__}
""" return debug_html # Connect events analyze_btn.click( fn=predict_sentiment_with_details, inputs=text_input, outputs=[result_output, confidence_plot, predicted_class, status_display] ) clear_btn.click( fn=clear_inputs, outputs=[text_input, result_output, confidence_plot, predicted_class, status_display] ) # Update status on load demo.load( fn=update_model_status, outputs=model_status ) demo.load( fn=get_debug_info, outputs=debug_info ) return demo # Load model and launch interface if __name__ == "__main__": print("๐Ÿš€ Starting BERT Sentiment Analyzer...") print("=" * 60) # Load the model model_loaded = load_trained_model() print("\n" + "=" * 60) if model_loaded: print("๐ŸŽ‰ MODEL LOADING COMPLETED!") if os.path.exists('sentiment_pipeline.pkl'): print("โœ… Fine-tuned model ready for predictions!") else: print("โš ๏ธ Using base BERT model (fine-tuned model not found)") else: print("โŒ Model loading encountered issues - check logs above") print("โœ… Creating Gradio interface...") demo = create_gradio_interface() print("๐ŸŒ Launching web interface...") print("=" * 60) # Launch the interface demo.launch( server_name="0.0.0.0", # Allow external access server_port=7860, # Default Gradio port share=False # Don't create public link (use Space URL) )