import gradio as gr import torch import pickle import pandas as pd import os import io # Global variables loaded_pipeline = None model_device = 'cpu' # Force CPU for Hugging Face Spaces def load_model(): """Load the BERT sentiment model from pickle""" global loaded_pipeline print(f"š„ļø Using device: {model_device}") try: model_file = 'sentiment_pipeline.pkl' if not os.path.exists(model_file): print(f"ā Model file not found: {model_file}") return False print(f"š¦ Loading BERT model from {model_file}...") # Custom unpickler for CPU compatibility class CPUUnpickler(pickle.Unpickler): def find_class(self, module, name): if module == 'torch.storage' and name == '_load_from_bytes': return lambda b: torch.load(io.BytesIO(b), map_location='cpu') else: return super().find_class(module, name) with open(model_file, 'rb') as f: loaded_pipeline = pickle.load(f) # Move model to CPU if 'model' in loaded_pipeline: loaded_pipeline['model'] = loaded_pipeline['model'].to('cpu') loaded_pipeline['model'].eval() print(f"ā Successfully loaded BERT model") if 'best_val_accuracy' in loaded_pipeline: print(f"šÆ Validation Accuracy: {loaded_pipeline['best_val_accuracy']:.4f}") return True except Exception as e: print(f"ā Loading failed: {e}") import traceback traceback.print_exc() return False def predict_sentiment(text): """Predict sentiment using BERT model""" if loaded_pipeline is None: return { 'sentiment': 'error', 'confidence': 0.0, 'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0}, 'error': 'Model not loaded' } try: model = loaded_pipeline['model'] tokenizer = loaded_pipeline['tokenizer'] max_length = loaded_pipeline.get('training_config', {}).get('max_length', 128) # Tokenize inputs = tokenizer( text, return_tensors='pt', truncation=True, padding=True, max_length=max_length ) inputs = {k: v.to('cpu') for k, v in inputs.items()} # Predict model.eval() with torch.no_grad(): outputs = model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) prediction = torch.argmax(probabilities, dim=1).item() confidence = probabilities.max().item() sentiment_names = ['negative', 'neutral', 'positive'] return { 'sentiment': sentiment_names[prediction], 'confidence': confidence, 'scores': { 'negative': float(probabilities[0][0].item()), 'neutral': float(probabilities[0][1].item()), 'positive': float(probabilities[0][2].item()) } } except Exception as e: print(f"Prediction error: {e}") return { 'sentiment': 'error', 'confidence': 0.0, 'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0}, 'error': str(e) } def analyze_sentiment(text): """Analyze sentiment and return formatted results""" if loaded_pipeline is None: return ( "ā **Model not loaded!** Please upload sentiment_pipeline.pkl", pd.DataFrame(), "Error", "Model not available" ) if not text or not text.strip(): return ( "ā ļø **Please enter text**", pd.DataFrame(), "No input", "Enter text above" ) try: result = predict_sentiment(text.strip()) if 'error' in result: return ( f"ā **Error:** {result['error']}", pd.DataFrame(), "Error", f"Error: {result['error']}" ) sentiment = result['sentiment'] confidence = result['confidence'] scores = result['scores'] # Create DataFrame for chart chart_data = pd.DataFrame({ 'Sentiment': ['Negative', 'Neutral', 'Positive'], 'Confidence': [scores['negative'], scores['neutral'], scores['positive']] }) # Emoji mapping emoji = {'negative': 'š', 'neutral': 'š', 'positive': 'š'}[sentiment] # Result message message = f""" ### {emoji} **{sentiment.title()}** Sentiment **Confidence:** {confidence:.1%} **Text:** *"{text[:100]}{'...' if len(text) > 100 else ''}"* **Scores:** - š Negative: {scores['negative']:.1%} - š Neutral: {scores['neutral']:.1%} - š Positive: {scores['positive']:.1%} ā Bias-corrected BERT model """ return message, chart_data, sentiment.title(), f"ā {sentiment.title()} ({confidence:.1%})" except Exception as e: return ( f"ā **Error:** {str(e)}", pd.DataFrame(), "Error", f"Error: {str(e)}" ) # Create Gradio interface with gr.Blocks(title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo: gr.HTML("""
Bias-Corrected Sentiment Classification
ā Trained with balanced data ⢠No negative bias