Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import pickle | |
| import pandas as pd | |
| from transformers import BertTokenizer, BertForSequenceClassification | |
| import numpy as np | |
| import os | |
| import io | |
| import traceback | |
| # Global variables for model components | |
| loaded_model = None | |
| loaded_tokenizer = None | |
| model_device = torch.device('cpu') # Always use CPU for Hugging Face Spaces | |
| def safe_pickle_load(file_path): | |
| """Safely load pickle file with CPU mapping for all torch tensors""" | |
| class CPUUnpickler(pickle.Unpickler): | |
| def find_class(self, module, name): | |
| if module == 'torch.storage' and name == '_load_from_bytes': | |
| return lambda b: torch.load(io.BytesIO(b), map_location='cpu') | |
| else: | |
| return super().find_class(module, name) | |
| with open(file_path, 'rb') as f: | |
| return CPUUnpickler(f).load() | |
| def load_trained_model(): | |
| """Load the trained BERT model with comprehensive CPU compatibility""" | |
| global loaded_model, loaded_tokenizer | |
| print("π Starting model loading process...") | |
| print(f"π₯οΈ Target device: {model_device}") | |
| print(f"π Current directory: {os.getcwd()}") | |
| print(f"π Files in directory: {os.listdir('.')}") | |
| loading_success = False | |
| try: | |
| # Method 1: Try loading from pickle with advanced CPU mapping | |
| if os.path.exists('sentiment_pipeline.pkl'): | |
| print("\nπ¦ Method 1: Loading from sentiment_pipeline.pkl...") | |
| try: | |
| # First try the safe pickle loader | |
| pipeline = safe_pickle_load('sentiment_pipeline.pkl') | |
| loaded_model = pipeline['model'] | |
| loaded_tokenizer = pipeline['tokenizer'] | |
| # Force everything to CPU | |
| loaded_model = loaded_model.to('cpu') | |
| loaded_model.eval() | |
| print("β Successfully loaded from pickle with safe CPU mapping!") | |
| loading_success = True | |
| except Exception as e: | |
| print(f"β Safe pickle loading failed: {e}") | |
| # Fallback: Try with torch.load override | |
| try: | |
| print("π Trying torch.load override method...") | |
| # Override torch.load temporarily | |
| original_torch_load = torch.load | |
| torch.load = lambda *args, **kwargs: original_torch_load(*args, **{**kwargs, 'map_location': 'cpu'}) | |
| with open('sentiment_pipeline.pkl', 'rb') as f: | |
| pipeline = pickle.load(f) | |
| loaded_model = pipeline['model'] | |
| loaded_tokenizer = pipeline['tokenizer'] | |
| # Restore original torch.load | |
| torch.load = original_torch_load | |
| # Ensure CPU | |
| loaded_model = loaded_model.to('cpu') | |
| loaded_model.eval() | |
| print("β Successfully loaded with torch.load override!") | |
| loading_success = True | |
| except Exception as e2: | |
| print(f"β Torch.load override also failed: {e2}") | |
| # Method 2: Try loading from HuggingFace format | |
| if not loading_success and os.path.exists('bert_sentiment_model'): | |
| print("\nπ€ Method 2: Loading from HuggingFace format...") | |
| try: | |
| loaded_model = BertForSequenceClassification.from_pretrained('bert_sentiment_model') | |
| loaded_tokenizer = BertTokenizer.from_pretrained('bert_sentiment_model') | |
| loaded_model = loaded_model.to('cpu') | |
| loaded_model.eval() | |
| print("β Successfully loaded from HuggingFace format!") | |
| loading_success = True | |
| except Exception as e: | |
| print(f"β HuggingFace format loading failed: {e}") | |
| # Method 3: Try loading individual model files | |
| if not loading_success: | |
| for potential_path in ['./model', './trained_model', './fine_tuned_model']: | |
| if os.path.exists(potential_path): | |
| print(f"\nπ Method 3: Trying to load from {potential_path}...") | |
| try: | |
| loaded_model = BertForSequenceClassification.from_pretrained(potential_path) | |
| loaded_tokenizer = BertTokenizer.from_pretrained(potential_path) | |
| loaded_model = loaded_model.to('cpu') | |
| loaded_model.eval() | |
| print(f"β Successfully loaded from {potential_path}!") | |
| loading_success = True | |
| break | |
| except Exception as e: | |
| print(f"β Loading from {potential_path} failed: {e}") | |
| # Method 4: Load base BERT model as fallback | |
| if not loading_success: | |
| print("\nβ οΈ Method 4: Loading base BERT model as fallback...") | |
| try: | |
| loaded_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) | |
| loaded_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
| loaded_model = loaded_model.to('cpu') | |
| loaded_model.eval() | |
| print("β Base BERT model loaded successfully!") | |
| print("β οΈ NOTE: This is NOT your fine-tuned model!") | |
| loading_success = True | |
| except Exception as e: | |
| print(f"β Even base BERT loading failed: {e}") | |
| return False | |
| # Verify model is working | |
| if loaded_model is not None and loaded_tokenizer is not None: | |
| print("\nπ§ͺ Testing model functionality...") | |
| try: | |
| test_input = "This is a test sentence" | |
| inputs = loaded_tokenizer(test_input, return_tensors='pt', truncation=True, padding=True, max_length=128) | |
| with torch.no_grad(): | |
| outputs = loaded_model(**inputs) | |
| probabilities = torch.softmax(outputs.logits, dim=1) | |
| print("β Model test prediction successful!") | |
| print(f"π Model parameters: {sum(p.numel() for p in loaded_model.parameters()):,}") | |
| print(f"π― Test output shape: {outputs.logits.shape}") | |
| print(f"π Test probabilities: {probabilities.squeeze().tolist()}") | |
| return True | |
| except Exception as e: | |
| print(f"β Model test failed: {e}") | |
| print(f"π Full error: {traceback.format_exc()}") | |
| return False | |
| else: | |
| print("β Model or tokenizer is None after loading") | |
| return False | |
| except Exception as e: | |
| print(f"β Critical error in model loading: {e}") | |
| print(f"π Full traceback: {traceback.format_exc()}") | |
| return False | |
| def predict_sentiment_with_details(text): | |
| """Predict sentiment with detailed output and error handling""" | |
| # Check if model is loaded | |
| if loaded_model is None or loaded_tokenizer is None: | |
| return ( | |
| "β **ERROR: Model not loaded!**\n\nThe fine-tuned model could not be loaded. Using base BERT instead.\n\n**Possible causes:**\n- Model file not uploaded to Hugging Face Space\n- Device compatibility issues\n- File corruption\n\n**Solutions:**\n- Make sure `sentiment_pipeline.pkl` is uploaded\n- Check Hugging Face Space logs\n- Try re-uploading the model file", | |
| pd.DataFrame(), | |
| "Error: No model", | |
| "Model not available - check upload" | |
| ) | |
| # Check if text is provided | |
| if not text or not text.strip(): | |
| return ( | |
| "β οΈ **Please enter some text to analyze**", | |
| pd.DataFrame(), | |
| "No input", | |
| "Enter text above" | |
| ) | |
| try: | |
| # Clean and prepare text | |
| clean_text = text.strip() | |
| print(f"π Analyzing: {clean_text[:50]}{'...' if len(clean_text) > 50 else ''}") | |
| # Tokenize input (ensure CPU) | |
| inputs = loaded_tokenizer( | |
| clean_text, | |
| return_tensors='pt', | |
| truncation=True, | |
| padding=True, | |
| max_length=128 | |
| ) | |
| # Move inputs to CPU explicitly | |
| inputs = {k: v.to('cpu') for k, v in inputs.items()} | |
| # Get prediction (all on CPU) | |
| with torch.no_grad(): | |
| outputs = loaded_model(**inputs) | |
| probabilities = torch.softmax(outputs.logits, dim=1) | |
| prediction = torch.argmax(probabilities, dim=1).item() | |
| confidence = probabilities.max().item() | |
| # Map labels | |
| label_mapping = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} | |
| predicted_sentiment = label_mapping[prediction] | |
| # Create confidence scores for visualization | |
| confidence_data = pd.DataFrame({ | |
| 'Sentiment': ['Negative', 'Neutral', 'Positive'], | |
| 'Confidence': [ | |
| float(probabilities[0][0].item()), | |
| float(probabilities[0][1].item()), | |
| float(probabilities[0][2].item()) | |
| ] | |
| }) | |
| # Create detailed result message | |
| emoji_map = {'Negative': 'π', 'Neutral': 'π', 'Positive': 'π'} | |
| emoji = emoji_map[predicted_sentiment] | |
| # Check if this is the fine-tuned model or base model | |
| model_type = "Fine-tuned BERT" if os.path.exists('sentiment_pipeline.pkl') else "Base BERT (not fine-tuned)" | |
| result_message = f""" | |
| ### {emoji} **{predicted_sentiment}** Sentiment Detected | |
| **Confidence Score:** {confidence:.1%} | |
| **Input Text:** *"{clean_text[:100]}{'...' if len(clean_text) > 100 else ''}"* | |
| **Analysis Details:** | |
| - **Negative:** {probabilities[0][0].item():.1%} | |
| - **Neutral:** {probabilities[0][1].item():.1%} | |
| - **Positive:** {probabilities[0][2].item():.1%} | |
| **Model Type:** {model_type} | |
| **Status:** β Prediction completed successfully | |
| """ | |
| status_message = f"β Analysis complete - {predicted_sentiment} sentiment ({confidence:.1%} confidence)" | |
| return result_message, confidence_data, predicted_sentiment, status_message | |
| except Exception as e: | |
| error_msg = f"β **Prediction Error:** {str(e)}\n\nDetailed error information:\n```\n{traceback.format_exc()}\n```" | |
| print(f"Prediction error: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return error_msg, pd.DataFrame(), "Error", f"Error: {str(e)}" | |
| def create_gradio_interface(): | |
| """Create enhanced Gradio interface with comprehensive model status""" | |
| # Custom CSS for better styling | |
| css = """ | |
| .model-status { | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin-bottom: 1rem; | |
| text-align: center; | |
| font-weight: bold; | |
| } | |
| .status-success { | |
| background-color: #d4edda; | |
| color: #155724; | |
| border: 1px solid #c3e6cb; | |
| } | |
| .status-warning { | |
| background-color: #fff3cd; | |
| color: #856404; | |
| border: 1px solid #ffeaa7; | |
| } | |
| .status-error { | |
| background-color: #f8d7da; | |
| color: #721c24; | |
| border: 1px solid #f5c6cb; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 2rem; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 2rem;"> | |
| <h1>π€ BERT Sentiment Classification</h1> | |
| <p>Advanced AI-powered sentiment analysis using BERT</p> | |
| <p><strong>π Permanently hosted on Hugging Face Spaces</strong></p> | |
| </div> | |
| """) | |
| # Model status indicator | |
| model_status = gr.HTML() | |
| # Debug information (collapsible) | |
| with gr.Accordion("π§ Debug Information", open=False): | |
| debug_info = gr.HTML() | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gr.Markdown("### π Enter Text for Sentiment Analysis") | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter your text here... (e.g., 'This product is amazing! Great quality and fast delivery.')", | |
| lines=6, | |
| max_lines=20, | |
| value="" | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("π Analyze Sentiment", variant="primary", size="lg") | |
| clear_btn = gr.Button("ποΈ Clear", size="sm") | |
| gr.Markdown("### π‘ Example Texts to Try:") | |
| examples = gr.Examples( | |
| examples=[ | |
| ["This product exceeded all my expectations! Outstanding quality and excellent customer service."], | |
| ["I'm completely disappointed with this purchase. Poor quality and terrible customer support."], | |
| ["The product is decent. It works as described but nothing extraordinary."], | |
| ["Best purchase I've made this year! Highly recommend to everyone."], | |
| ["Absolutely horrible experience. Would never buy from this company again."], | |
| ["It's okay, good value for the price but could be improved."] | |
| ], | |
| inputs=text_input, | |
| label=None | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Analysis Results") | |
| result_output = gr.Markdown( | |
| value="*Enter text and click 'Analyze Sentiment' to see results*" | |
| ) | |
| confidence_plot = gr.BarPlot( | |
| x="Sentiment", | |
| y="Confidence", | |
| title="Confidence Scores by Sentiment Class", | |
| x_title="Sentiment", | |
| y_title="Confidence Score", | |
| width=500, | |
| height=300, | |
| container=True | |
| ) | |
| predicted_class = gr.Textbox( | |
| label="Predicted Sentiment Class", | |
| interactive=False, | |
| value="" | |
| ) | |
| status_display = gr.Textbox( | |
| label="Analysis Status", | |
| interactive=False, | |
| value="Ready for analysis" | |
| ) | |
| # Model Information Section | |
| with gr.Accordion("π Model Information & Setup Guide", open=False): | |
| gr.Markdown(f""" | |
| ### π§ Model Architecture | |
| - **Base Model:** BERT (bert-base-uncased) | |
| - **Task:** Multi-class sentiment classification | |
| - **Classes:** Negative π, Neutral π, Positive π | |
| - **Max Sequence Length:** 128 tokens | |
| - **Device:** {model_device} | |
| ### π Required Files for Fine-tuned Model | |
| To use your fine-tuned model, make sure these files are uploaded to your Hugging Face Space: | |
| 1. **sentiment_pipeline.pkl** - Your trained model (REQUIRED) | |
| 2. **app.py** - This application file | |
| 3. **requirements.txt** - Python dependencies | |
| 4. **README.md** - Space configuration | |
| ### π Hugging Face Space Setup Instructions | |
| 1. **Create New Space:** | |
| - Go to [Hugging Face Spaces](https://huggingface.co/spaces) | |
| - Click "Create new Space" | |
| - Choose **Gradio** as SDK | |
| - Select **CPU** as hardware (recommended for this model) | |
| 2. **Upload Files:** | |
| - Upload `sentiment_pipeline.pkl` (your trained model) | |
| - Upload `app.py` (this file) | |
| - Upload `requirements.txt` | |
| - Upload `README.md` | |
| 3. **Space Configuration:** | |
| - Make sure your `README.md` has this header: | |
| ```yaml | |
| --- | |
| title: BERT Sentiment Analyzer | |
| emoji: π€ | |
| colorFrom: blue | |
| colorTo: purple | |
| sdk: gradio | |
| sdk_version: 3.40.0 | |
| app_file: app.py | |
| pinned: false | |
| --- | |
| ``` | |
| ### β οΈ Troubleshooting | |
| **If you see "Model Not Loaded":** | |
| - Check if `sentiment_pipeline.pkl` is uploaded | |
| - Verify file size (should be ~400MB+) | |
| - Check Space logs for errors | |
| - Make sure you selected **CPU** hardware | |
| **If predictions seem wrong:** | |
| - The app might be using base BERT instead of your fine-tuned model | |
| - Re-upload `sentiment_pipeline.pkl` | |
| - Check the model status indicator above | |
| """) | |
| # Event handlers | |
| def clear_inputs(): | |
| return "", "*Enter text to see analysis*", pd.DataFrame(), "", "Ready for analysis" | |
| def update_model_status(): | |
| if loaded_model is not None and loaded_tokenizer is not None: | |
| if os.path.exists('sentiment_pipeline.pkl'): | |
| return """<div class="model-status status-success">β Fine-tuned Model Loaded Successfully - Ready for Analysis!</div>""" | |
| else: | |
| return """<div class="model-status status-warning">β οΈ Base BERT Model Loaded - Upload sentiment_pipeline.pkl for fine-tuned predictions</div>""" | |
| else: | |
| return """<div class="model-status status-error">β Model Loading Failed - Check files and logs</div>""" | |
| def get_debug_info(): | |
| debug_html = f""" | |
| <div style="font-family: monospace; background: #f8f9fa; padding: 1rem; border-radius: 5px;"> | |
| <strong>Debug Information:</strong><br> | |
| π Current directory: {os.getcwd()}<br> | |
| π Files present: {', '.join(os.listdir('.'))}<br> | |
| π€ Model loaded: {loaded_model is not None}<br> | |
| π€ Tokenizer loaded: {loaded_tokenizer is not None}<br> | |
| πΎ Pickle file exists: {os.path.exists('sentiment_pipeline.pkl')}<br> | |
| π₯οΈ Device: {model_device}<br> | |
| π Python version: {torch.__version__}<br> | |
| </div> | |
| """ | |
| return debug_html | |
| # Connect events | |
| analyze_btn.click( | |
| fn=predict_sentiment_with_details, | |
| inputs=text_input, | |
| outputs=[result_output, confidence_plot, predicted_class, status_display] | |
| ) | |
| clear_btn.click( | |
| fn=clear_inputs, | |
| outputs=[text_input, result_output, confidence_plot, predicted_class, status_display] | |
| ) | |
| # Update status on load | |
| demo.load( | |
| fn=update_model_status, | |
| outputs=model_status | |
| ) | |
| demo.load( | |
| fn=get_debug_info, | |
| outputs=debug_info | |
| ) | |
| return demo | |
| # Load model and launch interface | |
| if __name__ == "__main__": | |
| print("π Starting BERT Sentiment Analyzer...") | |
| print("=" * 60) | |
| # Load the model | |
| model_loaded = load_trained_model() | |
| print("\n" + "=" * 60) | |
| if model_loaded: | |
| print("π MODEL LOADING COMPLETED!") | |
| if os.path.exists('sentiment_pipeline.pkl'): | |
| print("β Fine-tuned model ready for predictions!") | |
| else: | |
| print("β οΈ Using base BERT model (fine-tuned model not found)") | |
| else: | |
| print("β Model loading encountered issues - check logs above") | |
| print("β Creating Gradio interface...") | |
| demo = create_gradio_interface() | |
| print("π Launching web interface...") | |
| print("=" * 60) | |
| # Launch the interface | |
| demo.launch( | |
| server_name="0.0.0.0", # Allow external access | |
| server_port=7860, # Default Gradio port | |
| share=False # Don't create public link (use Space URL) | |
| ) |