|
|
import gradio as gr |
|
|
import torch |
|
|
import pickle |
|
|
import pandas as pd |
|
|
import os |
|
|
import io |
|
|
|
|
|
|
|
|
loaded_pipeline = None |
|
|
model_device = 'cpu' |
|
|
|
|
|
def load_model(): |
|
|
"""Load the BERT sentiment model from pickle""" |
|
|
global loaded_pipeline |
|
|
|
|
|
print(f"π₯οΈ Using device: {model_device}") |
|
|
|
|
|
try: |
|
|
model_file = 'sentiment_pipeline.pkl' |
|
|
|
|
|
if not os.path.exists(model_file): |
|
|
print(f"β Model file not found: {model_file}") |
|
|
return False |
|
|
|
|
|
print(f"π¦ Loading BERT model from {model_file}...") |
|
|
|
|
|
|
|
|
class CPUUnpickler(pickle.Unpickler): |
|
|
def find_class(self, module, name): |
|
|
if module == 'torch.storage' and name == '_load_from_bytes': |
|
|
return lambda b: torch.load(io.BytesIO(b), map_location='cpu') |
|
|
else: |
|
|
return super().find_class(module, name) |
|
|
|
|
|
with open(model_file, 'rb') as f: |
|
|
loaded_pipeline = pickle.load(f) |
|
|
|
|
|
|
|
|
if 'model' in loaded_pipeline: |
|
|
loaded_pipeline['model'] = loaded_pipeline['model'].to('cpu') |
|
|
loaded_pipeline['model'].eval() |
|
|
|
|
|
print(f"β
Successfully loaded BERT model") |
|
|
|
|
|
if 'best_val_accuracy' in loaded_pipeline: |
|
|
print(f"π― Validation Accuracy: {loaded_pipeline['best_val_accuracy']:.4f}") |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Loading failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return False |
|
|
|
|
|
def predict_sentiment(text): |
|
|
"""Predict sentiment using BERT model""" |
|
|
|
|
|
if loaded_pipeline is None: |
|
|
return { |
|
|
'sentiment': 'error', |
|
|
'confidence': 0.0, |
|
|
'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0}, |
|
|
'error': 'Model not loaded' |
|
|
} |
|
|
|
|
|
try: |
|
|
model = loaded_pipeline['model'] |
|
|
tokenizer = loaded_pipeline['tokenizer'] |
|
|
max_length = loaded_pipeline.get('training_config', {}).get('max_length', 128) |
|
|
|
|
|
|
|
|
inputs = tokenizer( |
|
|
text, |
|
|
return_tensors='pt', |
|
|
truncation=True, |
|
|
padding=True, |
|
|
max_length=max_length |
|
|
) |
|
|
|
|
|
inputs = {k: v.to('cpu') for k, v in inputs.items()} |
|
|
|
|
|
|
|
|
model.eval() |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
probabilities = torch.softmax(outputs.logits, dim=1) |
|
|
prediction = torch.argmax(probabilities, dim=1).item() |
|
|
confidence = probabilities.max().item() |
|
|
|
|
|
sentiment_names = ['negative', 'neutral', 'positive'] |
|
|
|
|
|
return { |
|
|
'sentiment': sentiment_names[prediction], |
|
|
'confidence': confidence, |
|
|
'scores': { |
|
|
'negative': float(probabilities[0][0].item()), |
|
|
'neutral': float(probabilities[0][1].item()), |
|
|
'positive': float(probabilities[0][2].item()) |
|
|
} |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Prediction error: {e}") |
|
|
return { |
|
|
'sentiment': 'error', |
|
|
'confidence': 0.0, |
|
|
'scores': {'negative': 0.0, 'neutral': 0.0, 'positive': 0.0}, |
|
|
'error': str(e) |
|
|
} |
|
|
|
|
|
def analyze_sentiment(text): |
|
|
"""Analyze sentiment and return formatted results""" |
|
|
|
|
|
if loaded_pipeline is None: |
|
|
return ( |
|
|
"β **Model not loaded!** Please upload sentiment_pipeline.pkl", |
|
|
pd.DataFrame(), |
|
|
"Error", |
|
|
"Model not available" |
|
|
) |
|
|
|
|
|
if not text or not text.strip(): |
|
|
return ( |
|
|
"β οΈ **Please enter text**", |
|
|
pd.DataFrame(), |
|
|
"No input", |
|
|
"Enter text above" |
|
|
) |
|
|
|
|
|
try: |
|
|
result = predict_sentiment(text.strip()) |
|
|
|
|
|
if 'error' in result: |
|
|
return ( |
|
|
f"β **Error:** {result['error']}", |
|
|
pd.DataFrame(), |
|
|
"Error", |
|
|
f"Error: {result['error']}" |
|
|
) |
|
|
|
|
|
sentiment = result['sentiment'] |
|
|
confidence = result['confidence'] |
|
|
scores = result['scores'] |
|
|
|
|
|
|
|
|
chart_data = pd.DataFrame({ |
|
|
'Sentiment': ['Negative', 'Neutral', 'Positive'], |
|
|
'Confidence': [scores['negative'], scores['neutral'], scores['positive']] |
|
|
}) |
|
|
|
|
|
|
|
|
emoji = {'negative': 'π', 'neutral': 'π', 'positive': 'π'}[sentiment] |
|
|
|
|
|
|
|
|
message = f""" |
|
|
### {emoji} **{sentiment.title()}** Sentiment |
|
|
|
|
|
**Confidence:** {confidence:.1%} |
|
|
|
|
|
**Text:** *"{text[:100]}{'...' if len(text) > 100 else ''}"* |
|
|
|
|
|
**Scores:** |
|
|
- π Negative: {scores['negative']:.1%} |
|
|
- π Neutral: {scores['neutral']:.1%} |
|
|
- π Positive: {scores['positive']:.1%} |
|
|
|
|
|
β
Bias-corrected BERT model |
|
|
""" |
|
|
|
|
|
return message, chart_data, sentiment.title(), f"β
{sentiment.title()} ({confidence:.1%})" |
|
|
|
|
|
except Exception as e: |
|
|
return ( |
|
|
f"β **Error:** {str(e)}", |
|
|
pd.DataFrame(), |
|
|
"Error", |
|
|
f"Error: {str(e)}" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Blocks(title="BERT Sentiment Analyzer", theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; padding: 2rem; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 2rem;"> |
|
|
<h1>π€ BERT Sentiment Analyzer</h1> |
|
|
<p style="font-size: 1.2em;">Bias-Corrected Sentiment Classification</p> |
|
|
<p>β
Trained with balanced data β’ No negative bias</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
model_status = gr.HTML() |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
gr.Markdown("### π Input Text") |
|
|
|
|
|
text_input = gr.Textbox( |
|
|
label="Enter text to analyze", |
|
|
placeholder="Example: 'This product is amazing! Great quality and excellent service.'", |
|
|
lines=6 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
analyze_btn = gr.Button("π Analyze", variant="primary", size="lg") |
|
|
clear_btn = gr.Button("ποΈ Clear", size="sm") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
gr.Markdown("### οΏ½ Results") |
|
|
|
|
|
result_output = gr.Markdown("*Enter text to see results*") |
|
|
|
|
|
confidence_plot = gr.BarPlot( |
|
|
x="Sentiment", |
|
|
y="Confidence", |
|
|
title="Confidence Scores", |
|
|
width=500, |
|
|
height=300 |
|
|
) |
|
|
|
|
|
predicted_class = gr.Textbox(label="Prediction", interactive=False) |
|
|
status_display = gr.Textbox(label="Status", interactive=False, value="Ready") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["This product is absolutely amazing! Best purchase ever!"], |
|
|
["I love this so much! Outstanding quality!"], |
|
|
["Excellent customer service and fast delivery!"], |
|
|
["This is terrible! Worst product ever!"], |
|
|
["Completely disappointed. Poor quality."], |
|
|
["Awful experience. Would never buy again!"], |
|
|
["The product is okay. Nothing special but works."], |
|
|
["It's decent. Good value but could be better."], |
|
|
["This is not bad at all"], |
|
|
["Pretty good"], |
|
|
], |
|
|
inputs=text_input, |
|
|
outputs=[result_output, confidence_plot, predicted_class, status_display], |
|
|
fn=analyze_sentiment, |
|
|
cache_examples=False |
|
|
) |
|
|
|
|
|
with gr.Accordion("βΉοΈ Model Info", open=False): |
|
|
gr.Markdown(""" |
|
|
### π§ Model |
|
|
- **Architecture:** BERT (bert-base-uncased) |
|
|
- **Classes:** Negative π, Neutral π, Positive π |
|
|
- **Training:** Balanced dataset with class weights |
|
|
|
|
|
### π§ Features |
|
|
- β
No negative bias |
|
|
- β
Balanced training data |
|
|
- β
Class-weighted loss |
|
|
- β
CPU optimized |
|
|
|
|
|
### π Configuration |
|
|
- Epochs: 4 |
|
|
- Learning Rate: 1e-5 |
|
|
- Batch Size: 16 |
|
|
- Max Length: 128 tokens |
|
|
""") |
|
|
|
|
|
def clear_all(): |
|
|
return "", "*Enter text*", pd.DataFrame(), "", "Ready" |
|
|
|
|
|
def update_status(): |
|
|
if loaded_pipeline: |
|
|
val_acc = loaded_pipeline.get('best_val_accuracy', 'N/A') |
|
|
return f"""<div style="padding: 1rem; background: #d4edda; color: #155724; border-radius: 8px; text-align: center;"> |
|
|
β
Model Loaded | Accuracy: {val_acc if isinstance(val_acc, str) else f'{val_acc:.2%}'}</div>""" |
|
|
return """<div style="padding: 1rem; background: #f8d7da; color: #721c24; border-radius: 8px; text-align: center;"> |
|
|
β Model Not Loaded</div>""" |
|
|
|
|
|
analyze_btn.click( |
|
|
fn=analyze_sentiment, |
|
|
inputs=text_input, |
|
|
outputs=[result_output, confidence_plot, predicted_class, status_display] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_all, |
|
|
outputs=[text_input, result_output, confidence_plot, predicted_class, status_display] |
|
|
) |
|
|
|
|
|
demo.load(fn=update_status, outputs=model_status) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("π Starting BERT Sentiment Analyzer...") |
|
|
print("=" * 60) |
|
|
|
|
|
if load_model(): |
|
|
print("\nβ
MODEL READY!") |
|
|
print("π Launching interface...") |
|
|
demo.launch() |
|
|
else: |
|
|
print("\nβ FAILED TO LOAD MODEL!") |
|
|
print("π Ensure sentiment_pipeline.pkl exists") |
|
|
|