|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from transformers import pipeline |
|
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer |
|
|
import plotly.express as px |
|
|
from wordcloud import WordCloud |
|
|
import matplotlib.pyplot as plt |
|
|
from collections import Counter |
|
|
import re |
|
|
from datetime import datetime |
|
|
import time |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Sentiment Analytics Pro", page_icon="๐", layout="wide") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.metric-box { |
|
|
background-color: #f0f2f6; |
|
|
border-left: 5px solid #4B4B4B; |
|
|
padding: 20px; |
|
|
border-radius: 10px; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.stTextArea textarea { |
|
|
font-size: 16px; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_models(): |
|
|
try: |
|
|
st.info("๐ Loading AI models... This may take a few minutes on first run.") |
|
|
|
|
|
|
|
|
roberta = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="cardiffnlp/twitter-roberta-base-sentiment-latest", |
|
|
tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest" |
|
|
) |
|
|
|
|
|
distilbert = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="distilbert-base-uncased-finetuned-sst-2-english" |
|
|
) |
|
|
|
|
|
vader = SentimentIntensityAnalyzer() |
|
|
|
|
|
|
|
|
multilingual = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="nlptown/bert-base-multilingual-uncased-sentiment" |
|
|
) |
|
|
|
|
|
st.success("โ
All models loaded successfully!") |
|
|
return roberta, distilbert, vader, multilingual |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"โ Error loading models: {str(e)}") |
|
|
|
|
|
try: |
|
|
vader = SentimentIntensityAnalyzer() |
|
|
distilbert = pipeline("sentiment-analysis") |
|
|
return None, distilbert, vader, None |
|
|
except: |
|
|
return None, None, SentimentIntensityAnalyzer(), None |
|
|
|
|
|
|
|
|
with st.spinner("Initializing AI models..."): |
|
|
roberta_model, distilbert_model, vader_model, multi_model = load_models() |
|
|
|
|
|
|
|
|
if vader_model is None: |
|
|
st.error("โ Critical error: Failed to load essential models. Please refresh the page.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clean_text(text): |
|
|
text = text.lower() |
|
|
text = re.sub(r'http\S+', '', text) |
|
|
text = re.sub(r'[^\w\s]', '', text) |
|
|
return text |
|
|
|
|
|
def get_wordcloud(text): |
|
|
try: |
|
|
wc = WordCloud( |
|
|
width=800, |
|
|
height=400, |
|
|
background_color='white', |
|
|
max_words=100, |
|
|
colormap='viridis' |
|
|
).generate(text) |
|
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
|
ax.imshow(wc, interpolation='bilinear') |
|
|
ax.axis('off') |
|
|
return fig |
|
|
except Exception as e: |
|
|
st.error(f"WordCloud error: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_english(text): |
|
|
try: |
|
|
|
|
|
if not text.strip(): |
|
|
return { |
|
|
'verdict': 'neutral', |
|
|
'confidence': 'Low (No text)', |
|
|
'breakdown': {'Error': 'No text provided'}, |
|
|
'scores': {'Error': 0.0} |
|
|
} |
|
|
|
|
|
results = {} |
|
|
|
|
|
|
|
|
if roberta_model is not None: |
|
|
try: |
|
|
rob_out = roberta_model(text[:512])[0] |
|
|
rob_label = rob_out['label'] |
|
|
|
|
|
if rob_label == 'LABEL_0': |
|
|
rob_sent = 'negative' |
|
|
elif rob_label == 'LABEL_1': |
|
|
rob_sent = 'neutral' |
|
|
else: |
|
|
rob_sent = 'positive' |
|
|
results['roberta'] = (rob_sent, rob_out['score']) |
|
|
except Exception as e: |
|
|
st.warning(f"RoBERTa model unavailable: {e}") |
|
|
|
|
|
|
|
|
vader_out = vader_model.polarity_scores(text) |
|
|
compound = vader_out['compound'] |
|
|
if compound >= 0.05: |
|
|
vader_sent = 'positive' |
|
|
elif compound <= -0.05: |
|
|
vader_sent = 'negative' |
|
|
else: |
|
|
vader_sent = 'neutral' |
|
|
results['vader'] = (vader_sent, abs(compound)) |
|
|
|
|
|
|
|
|
if distilbert_model is not None: |
|
|
try: |
|
|
bert_out = distilbert_model(text[:512])[0] |
|
|
bert_sent = bert_out['label'].lower() |
|
|
results['distilbert'] = (bert_sent, bert_out['score']) |
|
|
except Exception as e: |
|
|
st.warning(f"DistilBERT model unavailable: {e}") |
|
|
|
|
|
|
|
|
if len(results) == 1 and 'vader' in results: |
|
|
return { |
|
|
'verdict': vader_sent, |
|
|
'confidence': 'Medium (VADER only)', |
|
|
'breakdown': {'VADER': vader_sent}, |
|
|
'scores': {'VADER': abs(compound)} |
|
|
} |
|
|
|
|
|
|
|
|
votes = [sent for sent, score in results.values()] |
|
|
count = Counter(votes) |
|
|
winner, vote_count = count.most_common(1)[0] |
|
|
|
|
|
|
|
|
if len(count) == len(results) or vote_count == 1: |
|
|
final_verdict = "ambiguous" |
|
|
confidence = f"Low ({vote_count}/{len(results)} agreement)" |
|
|
else: |
|
|
final_verdict = winner |
|
|
confidence = "High" if vote_count == len(results) else "Medium" |
|
|
|
|
|
return { |
|
|
'verdict': final_verdict, |
|
|
'confidence': confidence, |
|
|
'breakdown': {model: sent for model, (sent, score) in results.items()}, |
|
|
'scores': {model: score for model, (sent, score) in results.items()} |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Analysis error: {e}") |
|
|
return None |
|
|
|
|
|
def analyze_multilingual(text): |
|
|
try: |
|
|
if not text.strip(): |
|
|
return { |
|
|
'verdict': 'neutral', |
|
|
'confidence': 'Low (No text)', |
|
|
'breakdown': {'Error': 'No text provided'}, |
|
|
'scores': {'Error': 0.0} |
|
|
} |
|
|
|
|
|
|
|
|
if multi_model is not None: |
|
|
result = multi_model(text[:512])[0] |
|
|
label_raw = str(result['label']) |
|
|
score = result['score'] |
|
|
|
|
|
|
|
|
if '1' in label_raw or '2' in label_raw: |
|
|
sentiment = "negative" |
|
|
elif '3' in label_raw: |
|
|
sentiment = "neutral" |
|
|
else: |
|
|
sentiment = "positive" |
|
|
|
|
|
return { |
|
|
'verdict': sentiment, |
|
|
'confidence': f"{score:.2f}", |
|
|
'breakdown': {'Multilingual BERT': f"{sentiment.title()} ({score:.2f})"}, |
|
|
'scores': {'Model Confidence': score} |
|
|
} |
|
|
else: |
|
|
|
|
|
st.info("๐ Multilingual model unavailable, using English analysis...") |
|
|
return analyze_english(text) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Multilingual analysis error: {e}") |
|
|
|
|
|
return analyze_english(text) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("โ๏ธ Configuration") |
|
|
language = st.sidebar.selectbox("Select Language", ["English", "Hindi (เคนเคฟเคจเฅเคฆเฅ)", "Hinglish (Mixed)"]) |
|
|
mode = st.sidebar.selectbox("Analysis Mode", ["Real-time Analysis", "Batch Processing"]) |
|
|
|
|
|
st.sidebar.markdown("---") |
|
|
st.sidebar.info(""" |
|
|
**Model Status:** |
|
|
- โ
VADER: Available |
|
|
- ๐ค RoBERTa: {'โ
' if roberta_model else 'โ'} |
|
|
- ๐ DistilBERT: {'โ
' if distilbert_model else 'โ'} |
|
|
- ๐ Multilingual: {'โ
' if multi_model else 'โ'} |
|
|
""") |
|
|
|
|
|
st.title("๐ง Sentiment Analytics Pro") |
|
|
st.markdown("Advanced AI-powered sentiment analysis across multiple languages") |
|
|
st.markdown("---") |
|
|
|
|
|
if mode == "Real-time Analysis": |
|
|
|
|
|
|
|
|
if language == "Hindi (เคนเคฟเคจเฅเคฆเฅ)": |
|
|
placeholder_text = "เคฏเคนเคพเค เค
เคชเคจเคพ เคเฅเคเฅเคธเฅเค เคฒเคฟเคเฅเค (เคเคฆเคพ. เคฎเฅเคเฅ เคฏเคน เคเคคเฅเคชเคพเคฆ เคชเคธเคเคฆ เคเคฏเคพ)" |
|
|
label_text = "Enter Hindi Text:" |
|
|
elif language == "Hinglish (Mixed)": |
|
|
placeholder_text = "Type in Hinglish (e.g., Product bahut achha hai but delivery slow thi)" |
|
|
label_text = "Enter Hinglish Text:" |
|
|
else: |
|
|
placeholder_text = "Type your text here... (e.g., I love this product! Amazing quality.)" |
|
|
label_text = "Enter English Text:" |
|
|
|
|
|
user_input = st.text_area(label_text, height=150, placeholder=placeholder_text) |
|
|
|
|
|
if st.button("๐ Analyze Sentiment", type="primary", use_container_width=True): |
|
|
if not user_input.strip(): |
|
|
st.warning("โ ๏ธ Please enter some text first.") |
|
|
else: |
|
|
with st.spinner("๐ฎ Analyzing sentiment with AI models..."): |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
if language == "English": |
|
|
result = analyze_english(user_input) |
|
|
else: |
|
|
result = analyze_multilingual(user_input) |
|
|
|
|
|
if result is None: |
|
|
st.error("โ Analysis failed. Please try again with different text.") |
|
|
st.stop() |
|
|
|
|
|
latency = time.time() - start_time |
|
|
|
|
|
|
|
|
st.markdown("### ๐ Analysis Results") |
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
color_map = { |
|
|
'positive': '#10B981', |
|
|
'negative': '#EF4444', |
|
|
'neutral': '#F59E0B', |
|
|
'ambiguous': '#6B7280' |
|
|
} |
|
|
verdict_color = color_map.get(result['verdict'], '#3B82F6') |
|
|
|
|
|
with col1: |
|
|
st.markdown(f""" |
|
|
<div class="metric-box"> |
|
|
<h2 style='color: {verdict_color}; margin:0;'>{result['verdict'].upper()}</h2> |
|
|
<p style='margin:0;'>Final Verdict</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with col2: |
|
|
st.markdown(f""" |
|
|
<div class="metric-box"> |
|
|
<h2>{result['confidence']}</h2> |
|
|
<p style='margin:0;'>Confidence Level</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with col3: |
|
|
st.markdown(f""" |
|
|
<div class="metric-box"> |
|
|
<h2>{latency:.3f}s</h2> |
|
|
<p style='margin:0;'>Processing Time</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
c1, c2 = st.columns([1, 1]) |
|
|
|
|
|
with c1: |
|
|
st.subheader("๐ Model Consensus") |
|
|
if language == "English" and len(result['breakdown']) > 1: |
|
|
df_breakdown = pd.DataFrame( |
|
|
list(result['breakdown'].items()), |
|
|
columns=['Model', 'Prediction'] |
|
|
) |
|
|
st.table(df_breakdown) |
|
|
|
|
|
if result['verdict'] == 'ambiguous': |
|
|
st.error("โ ๏ธ Conflict Detected: Models disagree. Human review recommended.") |
|
|
else: |
|
|
for model, prediction in result['breakdown'].items(): |
|
|
st.info(f"**{model}**: {prediction}") |
|
|
|
|
|
with c2: |
|
|
st.subheader("๐ Confidence Scores") |
|
|
if result['scores']: |
|
|
df_scores = pd.DataFrame( |
|
|
list(result['scores'].items()), |
|
|
columns=['Source', 'Score'] |
|
|
) |
|
|
fig = px.bar( |
|
|
df_scores, |
|
|
x='Source', |
|
|
y='Score', |
|
|
range_y=[0,1], |
|
|
color='Score', |
|
|
color_continuous_scale='Blues' |
|
|
) |
|
|
fig.update_layout(showlegend=False) |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
|
|
|
if len(user_input) > 10: |
|
|
st.subheader("โ๏ธ Contextual Word Cloud") |
|
|
try: |
|
|
cleaned = clean_text(user_input) |
|
|
if len(cleaned.split()) >= 3: |
|
|
fig_wc = get_wordcloud(cleaned) |
|
|
if fig_wc: |
|
|
st.pyplot(fig_wc) |
|
|
else: |
|
|
st.info("๐ Word cloud not available for this text.") |
|
|
else: |
|
|
st.info("๐ Add more text for word cloud visualization.") |
|
|
except Exception as e: |
|
|
st.info("๐ Word cloud not available for this text type.") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
with st.expander("๐ Help Improve Accuracy (Report Incorrect Results)"): |
|
|
st.write("Your feedback helps train better AI models!") |
|
|
feedback = st.radio("What should the correct sentiment be?", |
|
|
["Positive", "Negative", "Neutral"], |
|
|
horizontal=True) |
|
|
|
|
|
if st.button("Submit Correction"): |
|
|
st.success(""" |
|
|
โ
Thank you! Your feedback has been recorded. |
|
|
This helps improve the AI model for everyone. |
|
|
""") |
|
|
|
|
|
elif mode == "Batch Processing": |
|
|
st.info("๐ Upload a CSV file with a 'text' column for batch analysis") |
|
|
uploaded_file = st.file_uploader("Choose CSV file", type=['csv']) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
try: |
|
|
df = pd.read_csv(uploaded_file) |
|
|
if 'text' not in df.columns: |
|
|
st.error("โ CSV file must contain a column named 'text'") |
|
|
else: |
|
|
st.success(f"โ
Loaded {len(df)} records") |
|
|
|
|
|
if st.button("๐ฎ Process Batch Analysis", type="primary", use_container_width=True): |
|
|
results = [] |
|
|
progress_bar = st.progress(0) |
|
|
status_text = st.empty() |
|
|
|
|
|
for i, row in df.iterrows(): |
|
|
status_text.text(f"Processing {i+1}/{len(df)}...") |
|
|
txt = str(row['text']) |
|
|
|
|
|
if language == "English": |
|
|
res = analyze_english(txt) |
|
|
else: |
|
|
res = analyze_multilingual(txt) |
|
|
|
|
|
if res: |
|
|
results.append(res['verdict']) |
|
|
else: |
|
|
results.append('analysis_error') |
|
|
|
|
|
progress_bar.progress((i + 1) / len(df)) |
|
|
|
|
|
status_text.text("โ
Analysis complete!") |
|
|
|
|
|
|
|
|
df['sentiment'] = results |
|
|
|
|
|
|
|
|
st.subheader("๐ Analysis Results") |
|
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("๐ Summary Statistics") |
|
|
sentiment_counts = df['sentiment'].value_counts() |
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
with col1: |
|
|
st.metric("Total Records", len(df)) |
|
|
with col2: |
|
|
st.metric("Positive", sentiment_counts.get('positive', 0)) |
|
|
with col3: |
|
|
st.metric("Negative", sentiment_counts.get('negative', 0)) |
|
|
|
|
|
|
|
|
csv = df.to_csv(index=False).encode('utf-8') |
|
|
st.download_button( |
|
|
"๐พ Download Results CSV", |
|
|
csv, |
|
|
"sentiment_analysis_results.csv", |
|
|
"text/csv", |
|
|
use_container_width=True |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"โ Error processing file: {str(e)}") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown( |
|
|
"<div style='text-align: center; color: #6B7280;'>" |
|
|
"Built with โค๏ธ using Streamlit & Hugging Face Transformers" |
|
|
"</div>", |
|
|
unsafe_allow_html=True |
|
|
) |