import gradio as gr import torch import pickle import pandas as pd from transformers import BertTokenizer, BertForSequenceClassification import numpy as np import os import io import traceback # Global variables for model components loaded_model = None loaded_tokenizer = None model_device = torch.device('cpu') # Always use CPU for Hugging Face Spaces def safe_pickle_load(file_path): """Safely load pickle file with CPU mapping for all torch tensors""" class CPUUnpickler(pickle.Unpickler): def find_class(self, module, name): if module == 'torch.storage' and name == '_load_from_bytes': return lambda b: torch.load(io.BytesIO(b), map_location='cpu') else: return super().find_class(module, name) with open(file_path, 'rb') as f: return CPUUnpickler(f).load() def load_trained_model(): """Load the trained BERT model with comprehensive CPU compatibility""" global loaded_model, loaded_tokenizer print("๐ Starting model loading process...") print(f"๐ฅ๏ธ Target device: {model_device}") print(f"๐ Current directory: {os.getcwd()}") print(f"๐ Files in directory: {os.listdir('.')}") loading_success = False try: # Method 1: Try loading from pickle with advanced CPU mapping if os.path.exists('sentiment_pipeline.pkl'): print("\n๐ฆ Method 1: Loading from sentiment_pipeline.pkl...") try: # First try the safe pickle loader pipeline = safe_pickle_load('sentiment_pipeline.pkl') loaded_model = pipeline['model'] loaded_tokenizer = pipeline['tokenizer'] # Force everything to CPU loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โ Successfully loaded from pickle with safe CPU mapping!") loading_success = True except Exception as e: print(f"โ Safe pickle loading failed: {e}") # Fallback: Try with torch.load override try: print("๐ Trying torch.load override method...") # Override torch.load temporarily original_torch_load = torch.load torch.load = lambda *args, **kwargs: original_torch_load(*args, **{**kwargs, 'map_location': 'cpu'}) with open('sentiment_pipeline.pkl', 'rb') as f: pipeline = pickle.load(f) loaded_model = pipeline['model'] loaded_tokenizer = pipeline['tokenizer'] # Restore original torch.load torch.load = original_torch_load # Ensure CPU loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โ Successfully loaded with torch.load override!") loading_success = True except Exception as e2: print(f"โ Torch.load override also failed: {e2}") # Method 2: Try loading from HuggingFace format if not loading_success and os.path.exists('bert_sentiment_model'): print("\n๐ค Method 2: Loading from HuggingFace format...") try: loaded_model = BertForSequenceClassification.from_pretrained('bert_sentiment_model') loaded_tokenizer = BertTokenizer.from_pretrained('bert_sentiment_model') loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โ Successfully loaded from HuggingFace format!") loading_success = True except Exception as e: print(f"โ HuggingFace format loading failed: {e}") # Method 3: Try loading individual model files if not loading_success: for potential_path in ['./model', './trained_model', './fine_tuned_model']: if os.path.exists(potential_path): print(f"\n๐ Method 3: Trying to load from {potential_path}...") try: loaded_model = BertForSequenceClassification.from_pretrained(potential_path) loaded_tokenizer = BertTokenizer.from_pretrained(potential_path) loaded_model = loaded_model.to('cpu') loaded_model.eval() print(f"โ Successfully loaded from {potential_path}!") loading_success = True break except Exception as e: print(f"โ Loading from {potential_path} failed: {e}") # Method 4: Load base BERT model as fallback if not loading_success: print("\nโ ๏ธ Method 4: Loading base BERT model as fallback...") try: loaded_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) loaded_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') loaded_model = loaded_model.to('cpu') loaded_model.eval() print("โ Base BERT model loaded successfully!") print("โ ๏ธ NOTE: This is NOT your fine-tuned model!") loading_success = True except Exception as e: print(f"โ Even base BERT loading failed: {e}") return False # Verify model is working if loaded_model is not None and loaded_tokenizer is not None: print("\n๐งช Testing model functionality...") try: test_input = "This is a test sentence" inputs = loaded_tokenizer(test_input, return_tensors='pt', truncation=True, padding=True, max_length=128) with torch.no_grad(): outputs = loaded_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) print("โ Model test prediction successful!") print(f"๐ Model parameters: {sum(p.numel() for p in loaded_model.parameters()):,}") print(f"๐ฏ Test output shape: {outputs.logits.shape}") print(f"๐ Test probabilities: {probabilities.squeeze().tolist()}") return True except Exception as e: print(f"โ Model test failed: {e}") print(f"๐ Full error: {traceback.format_exc()}") return False else: print("โ Model or tokenizer is None after loading") return False except Exception as e: print(f"โ Critical error in model loading: {e}") print(f"๐ Full traceback: {traceback.format_exc()}") return False def predict_sentiment_with_details(text): """Predict sentiment with detailed output and error handling""" # Check if model is loaded if loaded_model is None or loaded_tokenizer is None: return ( "โ **ERROR: Model not loaded!**\n\nThe fine-tuned model could not be loaded. Using base BERT instead.\n\n**Possible causes:**\n- Model file not uploaded to Hugging Face Space\n- Device compatibility issues\n- File corruption\n\n**Solutions:**\n- Make sure `sentiment_pipeline.pkl` is uploaded\n- Check Hugging Face Space logs\n- Try re-uploading the model file", pd.DataFrame(), "Error: No model", "Model not available - check upload" ) # Check if text is provided if not text or not text.strip(): return ( "โ ๏ธ **Please enter some text to analyze**", pd.DataFrame(), "No input", "Enter text above" ) try: # Clean and prepare text clean_text = text.strip() print(f"๐ Analyzing: {clean_text[:50]}{'...' if len(clean_text) > 50 else ''}") # Tokenize input (ensure CPU) inputs = loaded_tokenizer( clean_text, return_tensors='pt', truncation=True, padding=True, max_length=128 ) # Move inputs to CPU explicitly inputs = {k: v.to('cpu') for k, v in inputs.items()} # Get prediction (all on CPU) with torch.no_grad(): outputs = loaded_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) prediction = torch.argmax(probabilities, dim=1).item() confidence = probabilities.max().item() # Map labels label_mapping = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} predicted_sentiment = label_mapping[prediction] # Create confidence scores for visualization confidence_data = pd.DataFrame({ 'Sentiment': ['Negative', 'Neutral', 'Positive'], 'Confidence': [ float(probabilities[0][0].item()), float(probabilities[0][1].item()), float(probabilities[0][2].item()) ] }) # Create detailed result message emoji_map = {'Negative': '๐', 'Neutral': '๐', 'Positive': '๐'} emoji = emoji_map[predicted_sentiment] # Check if this is the fine-tuned model or base model model_type = "Fine-tuned BERT" if os.path.exists('sentiment_pipeline.pkl') else "Base BERT (not fine-tuned)" result_message = f""" ### {emoji} **{predicted_sentiment}** Sentiment Detected **Confidence Score:** {confidence:.1%} **Input Text:** *"{clean_text[:100]}{'...' if len(clean_text) > 100 else ''}"* **Analysis Details:** - **Negative:** {probabilities[0][0].item():.1%} - **Neutral:** {probabilities[0][1].item():.1%} - **Positive:** {probabilities[0][2].item():.1%} **Model Type:** {model_type} **Status:** โ Prediction completed successfully """ status_message = f"โ Analysis complete - {predicted_sentiment} sentiment ({confidence:.1%} confidence)" return result_message, confidence_data, predicted_sentiment, status_message except Exception as e: error_msg = f"โ **Prediction Error:** {str(e)}\n\nDetailed error information:\n```\n{traceback.format_exc()}\n```" print(f"Prediction error: {e}") print(f"Full traceback: {traceback.format_exc()}") return error_msg, pd.DataFrame(), "Error", f"Error: {str(e)}" def create_gradio_interface(): """Create enhanced Gradio interface with comprehensive model status""" # Custom CSS for better styling css = """ .model-status { padding: 1rem; border-radius: 8px; margin-bottom: 1rem; text-align: center; font-weight: bold; } .status-success { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; } .status-warning { background-color: #fff3cd; color: #856404; border: 1px solid #ffeaa7; } .status-error { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; } """ with gr.Blocks(css=css, title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo: # Header gr.HTML("""
Advanced AI-powered sentiment analysis using BERT
๐ Permanently hosted on Hugging Face Spaces