Update app.py
Browse files
app.py
CHANGED
|
@@ -35,122 +35,223 @@ st.markdown("""
|
|
| 35 |
# ------------------------------------------------------------------
|
| 36 |
@st.cache_resource
|
| 37 |
def load_models():
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# ------------------------------------------------------------------
|
| 53 |
# HELPER FUNCTIONS
|
| 54 |
# ------------------------------------------------------------------
|
| 55 |
def clean_text(text):
|
| 56 |
text = text.lower()
|
| 57 |
-
text = re.sub(r'http\S+', '', text)
|
| 58 |
-
text = re.sub(r'[^\w\s]', '', text)
|
| 59 |
return text
|
| 60 |
|
| 61 |
def get_wordcloud(text):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
# ------------------------------------------------------------------
|
| 70 |
# CORE ANALYSIS LOGIC
|
| 71 |
# ------------------------------------------------------------------
|
| 72 |
|
| 73 |
def analyze_english(text):
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
}
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def analyze_multilingual(text):
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
# ------------------------------------------------------------------
|
| 145 |
# UI LAYOUT
|
| 146 |
# ------------------------------------------------------------------
|
| 147 |
|
| 148 |
# Sidebar
|
| 149 |
-
st.sidebar.title("Configuration")
|
| 150 |
language = st.sidebar.selectbox("Select Language", ["English", "Hindi (เคนเคฟเคจเฅเคฆเฅ)", "Hinglish (Mixed)"])
|
| 151 |
-
mode = st.sidebar.selectbox("Mode", ["Real-time Analysis", "Batch Processing"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
-
st.title("๐ง Sentiment Analytics
|
|
|
|
| 154 |
st.markdown("---")
|
| 155 |
|
| 156 |
if mode == "Real-time Analysis":
|
|
@@ -163,33 +264,41 @@ if mode == "Real-time Analysis":
|
|
| 163 |
placeholder_text = "Type in Hinglish (e.g., Product bahut achha hai but delivery slow thi)"
|
| 164 |
label_text = "Enter Hinglish Text:"
|
| 165 |
else:
|
| 166 |
-
placeholder_text = "Type your text here..."
|
| 167 |
label_text = "Enter English Text:"
|
| 168 |
|
| 169 |
user_input = st.text_area(label_text, height=150, placeholder=placeholder_text)
|
| 170 |
|
| 171 |
-
if st.button("Analyze Sentiment", type="primary"):
|
| 172 |
if not user_input.strip():
|
| 173 |
-
st.warning("Please enter some text first.")
|
| 174 |
else:
|
| 175 |
-
with st.spinner("
|
| 176 |
start_time = time.time()
|
| 177 |
|
| 178 |
# Routing Logic
|
| 179 |
if language == "English":
|
| 180 |
result = analyze_english(user_input)
|
| 181 |
else:
|
| 182 |
-
# Both Hindi and Hinglish use the Multilingual Model
|
| 183 |
result = analyze_multilingual(user_input)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
latency = time.time() - start_time
|
| 186 |
|
| 187 |
# 1. Main Verdict Display
|
| 188 |
-
st.markdown("### Analysis Results")
|
| 189 |
col1, col2, col3 = st.columns(3)
|
| 190 |
|
| 191 |
-
color_map = {
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
with col1:
|
| 195 |
st.markdown(f"""
|
|
@@ -215,85 +324,145 @@ if mode == "Real-time Analysis":
|
|
| 215 |
</div>
|
| 216 |
""", unsafe_allow_html=True)
|
| 217 |
|
| 218 |
-
# 2. Detailed Breakdown
|
| 219 |
st.markdown("---")
|
| 220 |
c1, c2 = st.columns([1, 1])
|
| 221 |
|
| 222 |
with c1:
|
| 223 |
st.subheader("๐ Model Consensus")
|
| 224 |
-
if language == "English":
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
st.table(df_breakdown)
|
| 228 |
|
| 229 |
if result['verdict'] == 'ambiguous':
|
| 230 |
st.error("โ ๏ธ Conflict Detected: Models disagree. Human review recommended.")
|
| 231 |
else:
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
st.caption("Note: XLM-RoBERTa is optimized for 100+ languages including Hindi & Code-mixed text.")
|
| 235 |
|
| 236 |
with c2:
|
| 237 |
-
# Confidence Chart
|
| 238 |
st.subheader("๐ Confidence Scores")
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
-
# 3.
|
| 244 |
-
if
|
| 245 |
st.subheader("โ๏ธ Contextual Word Cloud")
|
| 246 |
try:
|
| 247 |
cleaned = clean_text(user_input)
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
# 4. Human Feedback Loop
|
| 254 |
st.markdown("---")
|
| 255 |
-
with st.expander("๐
|
| 256 |
-
st.write("
|
| 257 |
-
feedback = st.radio("
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
if st.button("Submit
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
"user_correction": feedback,
|
| 265 |
-
"timestamp": datetime.now().isoformat()
|
| 266 |
-
}
|
| 267 |
-
# In production, this would go to a database
|
| 268 |
-
st.success("โ
Feedback Logged. This sample has been added to the retraining queue.")
|
| 269 |
|
| 270 |
elif mode == "Batch Processing":
|
| 271 |
-
st.info("Upload a CSV file
|
| 272 |
-
uploaded_file = st.file_uploader("
|
| 273 |
|
| 274 |
-
if uploaded_file:
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
if
|
| 278 |
-
|
| 279 |
-
|
|
|
|
| 280 |
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
res = analyze_english(txt)
|
| 286 |
-
else:
|
| 287 |
-
res = analyze_multilingual(txt)
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# ------------------------------------------------------------------
|
| 36 |
@st.cache_resource
|
| 37 |
def load_models():
|
| 38 |
+
try:
|
| 39 |
+
st.info("๐ Loading AI models... This may take a few minutes on first run.")
|
| 40 |
+
|
| 41 |
+
# English Models (Ensemble)
|
| 42 |
+
roberta = pipeline(
|
| 43 |
+
"sentiment-analysis",
|
| 44 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 45 |
+
tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
distilbert = pipeline(
|
| 49 |
+
"sentiment-analysis",
|
| 50 |
+
model="distilbert-base-uncased-finetuned-sst-2-english"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
vader = SentimentIntensityAnalyzer()
|
| 54 |
+
|
| 55 |
+
# Use a more stable multilingual model
|
| 56 |
+
multilingual = pipeline(
|
| 57 |
+
"sentiment-analysis",
|
| 58 |
+
model="nlptown/bert-base-multilingual-uncased-sentiment"
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
st.success("โ
All models loaded successfully!")
|
| 62 |
+
return roberta, distilbert, vader, multilingual
|
| 63 |
+
|
| 64 |
+
except Exception as e:
|
| 65 |
+
st.error(f"โ Error loading models: {str(e)}")
|
| 66 |
+
# Return fallback models
|
| 67 |
+
try:
|
| 68 |
+
vader = SentimentIntensityAnalyzer()
|
| 69 |
+
distilbert = pipeline("sentiment-analysis")
|
| 70 |
+
return None, distilbert, vader, None
|
| 71 |
+
except:
|
| 72 |
+
return None, None, SentimentIntensityAnalyzer(), None
|
| 73 |
+
|
| 74 |
+
# Load models with progress indication
|
| 75 |
+
with st.spinner("Initializing AI models..."):
|
| 76 |
+
roberta_model, distilbert_model, vader_model, multi_model = load_models()
|
| 77 |
|
| 78 |
+
# Check if essential models loaded
|
| 79 |
+
if vader_model is None:
|
| 80 |
+
st.error("โ Critical error: Failed to load essential models. Please refresh the page.")
|
| 81 |
+
st.stop()
|
| 82 |
|
| 83 |
# ------------------------------------------------------------------
|
| 84 |
# HELPER FUNCTIONS
|
| 85 |
# ------------------------------------------------------------------
|
| 86 |
def clean_text(text):
|
| 87 |
text = text.lower()
|
| 88 |
+
text = re.sub(r'http\S+', '', text) # Remove URLs
|
| 89 |
+
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
| 90 |
return text
|
| 91 |
|
| 92 |
def get_wordcloud(text):
|
| 93 |
+
try:
|
| 94 |
+
wc = WordCloud(
|
| 95 |
+
width=800,
|
| 96 |
+
height=400,
|
| 97 |
+
background_color='white',
|
| 98 |
+
max_words=100,
|
| 99 |
+
colormap='viridis'
|
| 100 |
+
).generate(text)
|
| 101 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 102 |
+
ax.imshow(wc, interpolation='bilinear')
|
| 103 |
+
ax.axis('off')
|
| 104 |
+
return fig
|
| 105 |
+
except Exception as e:
|
| 106 |
+
st.error(f"WordCloud error: {e}")
|
| 107 |
+
return None
|
| 108 |
|
| 109 |
# ------------------------------------------------------------------
|
| 110 |
# CORE ANALYSIS LOGIC
|
| 111 |
# ------------------------------------------------------------------
|
| 112 |
|
| 113 |
def analyze_english(text):
|
| 114 |
+
try:
|
| 115 |
+
# Ensure text is not empty
|
| 116 |
+
if not text.strip():
|
| 117 |
+
return {
|
| 118 |
+
'verdict': 'neutral',
|
| 119 |
+
'confidence': 'Low (No text)',
|
| 120 |
+
'breakdown': {'Error': 'No text provided'},
|
| 121 |
+
'scores': {'Error': 0.0}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
results = {}
|
| 125 |
+
|
| 126 |
+
# 1. RoBERTa (if available)
|
| 127 |
+
if roberta_model is not None:
|
| 128 |
+
try:
|
| 129 |
+
rob_out = roberta_model(text[:512])[0]
|
| 130 |
+
rob_label = rob_out['label']
|
| 131 |
+
|
| 132 |
+
if rob_label == 'LABEL_0':
|
| 133 |
+
rob_sent = 'negative'
|
| 134 |
+
elif rob_label == 'LABEL_1':
|
| 135 |
+
rob_sent = 'neutral'
|
| 136 |
+
else:
|
| 137 |
+
rob_sent = 'positive'
|
| 138 |
+
results['roberta'] = (rob_sent, rob_out['score'])
|
| 139 |
+
except Exception as e:
|
| 140 |
+
st.warning(f"RoBERTa model unavailable: {e}")
|
| 141 |
+
|
| 142 |
+
# 2. VADER (always available)
|
| 143 |
+
vader_out = vader_model.polarity_scores(text)
|
| 144 |
+
compound = vader_out['compound']
|
| 145 |
+
if compound >= 0.05:
|
| 146 |
+
vader_sent = 'positive'
|
| 147 |
+
elif compound <= -0.05:
|
| 148 |
+
vader_sent = 'negative'
|
| 149 |
+
else:
|
| 150 |
+
vader_sent = 'neutral'
|
| 151 |
+
results['vader'] = (vader_sent, abs(compound))
|
| 152 |
+
|
| 153 |
+
# 3. DistilBERT (if available)
|
| 154 |
+
if distilbert_model is not None:
|
| 155 |
+
try:
|
| 156 |
+
bert_out = distilbert_model(text[:512])[0]
|
| 157 |
+
bert_sent = bert_out['label'].lower()
|
| 158 |
+
results['distilbert'] = (bert_sent, bert_out['score'])
|
| 159 |
+
except Exception as e:
|
| 160 |
+
st.warning(f"DistilBERT model unavailable: {e}")
|
| 161 |
+
|
| 162 |
+
# If only VADER is available
|
| 163 |
+
if len(results) == 1 and 'vader' in results:
|
| 164 |
+
return {
|
| 165 |
+
'verdict': vader_sent,
|
| 166 |
+
'confidence': 'Medium (VADER only)',
|
| 167 |
+
'breakdown': {'VADER': vader_sent},
|
| 168 |
+
'scores': {'VADER': abs(compound)}
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
# Consensus Logic (Voting)
|
| 172 |
+
votes = [sent for sent, score in results.values()]
|
| 173 |
+
count = Counter(votes)
|
| 174 |
+
winner, vote_count = count.most_common(1)[0]
|
| 175 |
+
|
| 176 |
+
# Conflict Detection
|
| 177 |
+
if len(count) == len(results) or vote_count == 1:
|
| 178 |
+
final_verdict = "ambiguous"
|
| 179 |
+
confidence = f"Low ({vote_count}/{len(results)} agreement)"
|
| 180 |
+
else:
|
| 181 |
+
final_verdict = winner
|
| 182 |
+
confidence = "High" if vote_count == len(results) else "Medium"
|
| 183 |
+
|
| 184 |
+
return {
|
| 185 |
+
'verdict': final_verdict,
|
| 186 |
+
'confidence': confidence,
|
| 187 |
+
'breakdown': {model: sent for model, (sent, score) in results.items()},
|
| 188 |
+
'scores': {model: score for model, (sent, score) in results.items()}
|
| 189 |
}
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
st.error(f"Analysis error: {e}")
|
| 193 |
+
return None
|
| 194 |
|
| 195 |
def analyze_multilingual(text):
|
| 196 |
+
try:
|
| 197 |
+
if not text.strip():
|
| 198 |
+
return {
|
| 199 |
+
'verdict': 'neutral',
|
| 200 |
+
'confidence': 'Low (No text)',
|
| 201 |
+
'breakdown': {'Error': 'No text provided'},
|
| 202 |
+
'scores': {'Error': 0.0}
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
# Use multilingual model if available, otherwise fallback to English analysis
|
| 206 |
+
if multi_model is not None:
|
| 207 |
+
result = multi_model(text[:512])[0]
|
| 208 |
+
label_raw = str(result['label'])
|
| 209 |
+
score = result['score']
|
| 210 |
+
|
| 211 |
+
# Map star ratings to sentiment (nlptown model uses 1-5 stars)
|
| 212 |
+
if '1' in label_raw or '2' in label_raw:
|
| 213 |
+
sentiment = "negative"
|
| 214 |
+
elif '3' in label_raw:
|
| 215 |
+
sentiment = "neutral"
|
| 216 |
+
else: # 4 or 5 stars
|
| 217 |
+
sentiment = "positive"
|
| 218 |
+
|
| 219 |
+
return {
|
| 220 |
+
'verdict': sentiment,
|
| 221 |
+
'confidence': f"{score:.2f}",
|
| 222 |
+
'breakdown': {'Multilingual BERT': f"{sentiment.title()} ({score:.2f})"},
|
| 223 |
+
'scores': {'Model Confidence': score}
|
| 224 |
+
}
|
| 225 |
+
else:
|
| 226 |
+
# Fallback to English analysis
|
| 227 |
+
st.info("๐ Multilingual model unavailable, using English analysis...")
|
| 228 |
+
return analyze_english(text)
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
st.error(f"Multilingual analysis error: {e}")
|
| 232 |
+
# Fallback to English analysis
|
| 233 |
+
return analyze_english(text)
|
| 234 |
|
| 235 |
# ------------------------------------------------------------------
|
| 236 |
# UI LAYOUT
|
| 237 |
# ------------------------------------------------------------------
|
| 238 |
|
| 239 |
# Sidebar
|
| 240 |
+
st.sidebar.title("โ๏ธ Configuration")
|
| 241 |
language = st.sidebar.selectbox("Select Language", ["English", "Hindi (เคนเคฟเคจเฅเคฆเฅ)", "Hinglish (Mixed)"])
|
| 242 |
+
mode = st.sidebar.selectbox("Analysis Mode", ["Real-time Analysis", "Batch Processing"])
|
| 243 |
+
|
| 244 |
+
st.sidebar.markdown("---")
|
| 245 |
+
st.sidebar.info("""
|
| 246 |
+
**Model Status:**
|
| 247 |
+
- โ
VADER: Available
|
| 248 |
+
- ๐ค RoBERTa: {'โ
' if roberta_model else 'โ'}
|
| 249 |
+
- ๐ DistilBERT: {'โ
' if distilbert_model else 'โ'}
|
| 250 |
+
- ๐ Multilingual: {'โ
' if multi_model else 'โ'}
|
| 251 |
+
""")
|
| 252 |
|
| 253 |
+
st.title("๐ง Sentiment Analytics Pro")
|
| 254 |
+
st.markdown("Advanced AI-powered sentiment analysis across multiple languages")
|
| 255 |
st.markdown("---")
|
| 256 |
|
| 257 |
if mode == "Real-time Analysis":
|
|
|
|
| 264 |
placeholder_text = "Type in Hinglish (e.g., Product bahut achha hai but delivery slow thi)"
|
| 265 |
label_text = "Enter Hinglish Text:"
|
| 266 |
else:
|
| 267 |
+
placeholder_text = "Type your text here... (e.g., I love this product! Amazing quality.)"
|
| 268 |
label_text = "Enter English Text:"
|
| 269 |
|
| 270 |
user_input = st.text_area(label_text, height=150, placeholder=placeholder_text)
|
| 271 |
|
| 272 |
+
if st.button("๐ Analyze Sentiment", type="primary", use_container_width=True):
|
| 273 |
if not user_input.strip():
|
| 274 |
+
st.warning("โ ๏ธ Please enter some text first.")
|
| 275 |
else:
|
| 276 |
+
with st.spinner("๐ฎ Analyzing sentiment with AI models..."):
|
| 277 |
start_time = time.time()
|
| 278 |
|
| 279 |
# Routing Logic
|
| 280 |
if language == "English":
|
| 281 |
result = analyze_english(user_input)
|
| 282 |
else:
|
|
|
|
| 283 |
result = analyze_multilingual(user_input)
|
| 284 |
|
| 285 |
+
if result is None:
|
| 286 |
+
st.error("โ Analysis failed. Please try again with different text.")
|
| 287 |
+
st.stop()
|
| 288 |
+
|
| 289 |
latency = time.time() - start_time
|
| 290 |
|
| 291 |
# 1. Main Verdict Display
|
| 292 |
+
st.markdown("### ๐ Analysis Results")
|
| 293 |
col1, col2, col3 = st.columns(3)
|
| 294 |
|
| 295 |
+
color_map = {
|
| 296 |
+
'positive': '#10B981',
|
| 297 |
+
'negative': '#EF4444',
|
| 298 |
+
'neutral': '#F59E0B',
|
| 299 |
+
'ambiguous': '#6B7280'
|
| 300 |
+
}
|
| 301 |
+
verdict_color = color_map.get(result['verdict'], '#3B82F6')
|
| 302 |
|
| 303 |
with col1:
|
| 304 |
st.markdown(f"""
|
|
|
|
| 324 |
</div>
|
| 325 |
""", unsafe_allow_html=True)
|
| 326 |
|
| 327 |
+
# 2. Detailed Breakdown
|
| 328 |
st.markdown("---")
|
| 329 |
c1, c2 = st.columns([1, 1])
|
| 330 |
|
| 331 |
with c1:
|
| 332 |
st.subheader("๐ Model Consensus")
|
| 333 |
+
if language == "English" and len(result['breakdown']) > 1:
|
| 334 |
+
df_breakdown = pd.DataFrame(
|
| 335 |
+
list(result['breakdown'].items()),
|
| 336 |
+
columns=['Model', 'Prediction']
|
| 337 |
+
)
|
| 338 |
st.table(df_breakdown)
|
| 339 |
|
| 340 |
if result['verdict'] == 'ambiguous':
|
| 341 |
st.error("โ ๏ธ Conflict Detected: Models disagree. Human review recommended.")
|
| 342 |
else:
|
| 343 |
+
for model, prediction in result['breakdown'].items():
|
| 344 |
+
st.info(f"**{model}**: {prediction}")
|
|
|
|
| 345 |
|
| 346 |
with c2:
|
|
|
|
| 347 |
st.subheader("๐ Confidence Scores")
|
| 348 |
+
if result['scores']:
|
| 349 |
+
df_scores = pd.DataFrame(
|
| 350 |
+
list(result['scores'].items()),
|
| 351 |
+
columns=['Source', 'Score']
|
| 352 |
+
)
|
| 353 |
+
fig = px.bar(
|
| 354 |
+
df_scores,
|
| 355 |
+
x='Source',
|
| 356 |
+
y='Score',
|
| 357 |
+
range_y=[0,1],
|
| 358 |
+
color='Score',
|
| 359 |
+
color_continuous_scale='Blues'
|
| 360 |
+
)
|
| 361 |
+
fig.update_layout(showlegend=False)
|
| 362 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 363 |
|
| 364 |
+
# 3. Word Cloud
|
| 365 |
+
if len(user_input) > 10:
|
| 366 |
st.subheader("โ๏ธ Contextual Word Cloud")
|
| 367 |
try:
|
| 368 |
cleaned = clean_text(user_input)
|
| 369 |
+
if len(cleaned.split()) >= 3: # Only generate if enough words
|
| 370 |
+
fig_wc = get_wordcloud(cleaned)
|
| 371 |
+
if fig_wc:
|
| 372 |
+
st.pyplot(fig_wc)
|
| 373 |
+
else:
|
| 374 |
+
st.info("๐ Word cloud not available for this text.")
|
| 375 |
+
else:
|
| 376 |
+
st.info("๐ Add more text for word cloud visualization.")
|
| 377 |
+
except Exception as e:
|
| 378 |
+
st.info("๐ Word cloud not available for this text type.")
|
| 379 |
|
| 380 |
+
# 4. Human Feedback Loop
|
| 381 |
st.markdown("---")
|
| 382 |
+
with st.expander("๐ Help Improve Accuracy (Report Incorrect Results)"):
|
| 383 |
+
st.write("Your feedback helps train better AI models!")
|
| 384 |
+
feedback = st.radio("What should the correct sentiment be?",
|
| 385 |
+
["Positive", "Negative", "Neutral"],
|
| 386 |
+
horizontal=True)
|
| 387 |
|
| 388 |
+
if st.button("Submit Correction"):
|
| 389 |
+
st.success("""
|
| 390 |
+
โ
Thank you! Your feedback has been recorded.
|
| 391 |
+
This helps improve the AI model for everyone.
|
| 392 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
elif mode == "Batch Processing":
|
| 395 |
+
st.info("๐ Upload a CSV file with a 'text' column for batch analysis")
|
| 396 |
+
uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
|
| 397 |
|
| 398 |
+
if uploaded_file is not None:
|
| 399 |
+
try:
|
| 400 |
+
df = pd.read_csv(uploaded_file)
|
| 401 |
+
if 'text' not in df.columns:
|
| 402 |
+
st.error("โ CSV file must contain a column named 'text'")
|
| 403 |
+
else:
|
| 404 |
+
st.success(f"โ
Loaded {len(df)} records")
|
| 405 |
|
| 406 |
+
if st.button("๐ฎ Process Batch Analysis", type="primary", use_container_width=True):
|
| 407 |
+
results = []
|
| 408 |
+
progress_bar = st.progress(0)
|
| 409 |
+
status_text = st.empty()
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
+
for i, row in df.iterrows():
|
| 412 |
+
status_text.text(f"Processing {i+1}/{len(df)}...")
|
| 413 |
+
txt = str(row['text'])
|
| 414 |
+
|
| 415 |
+
if language == "English":
|
| 416 |
+
res = analyze_english(txt)
|
| 417 |
+
else:
|
| 418 |
+
res = analyze_multilingual(txt)
|
| 419 |
+
|
| 420 |
+
if res:
|
| 421 |
+
results.append(res['verdict'])
|
| 422 |
+
else:
|
| 423 |
+
results.append('analysis_error')
|
| 424 |
+
|
| 425 |
+
progress_bar.progress((i + 1) / len(df))
|
| 426 |
+
|
| 427 |
+
status_text.text("โ
Analysis complete!")
|
| 428 |
+
|
| 429 |
+
# Add results to dataframe
|
| 430 |
+
df['sentiment'] = results
|
| 431 |
+
|
| 432 |
+
# Show results
|
| 433 |
+
st.subheader("๐ Analysis Results")
|
| 434 |
+
st.dataframe(df, use_container_width=True)
|
| 435 |
+
|
| 436 |
+
# Show summary
|
| 437 |
+
st.subheader("๐ Summary Statistics")
|
| 438 |
+
sentiment_counts = df['sentiment'].value_counts()
|
| 439 |
+
col1, col2, col3 = st.columns(3)
|
| 440 |
+
|
| 441 |
+
with col1:
|
| 442 |
+
st.metric("Total Records", len(df))
|
| 443 |
+
with col2:
|
| 444 |
+
st.metric("Positive", sentiment_counts.get('positive', 0))
|
| 445 |
+
with col3:
|
| 446 |
+
st.metric("Negative", sentiment_counts.get('negative', 0))
|
| 447 |
+
|
| 448 |
+
# Download
|
| 449 |
+
csv = df.to_csv(index=False).encode('utf-8')
|
| 450 |
+
st.download_button(
|
| 451 |
+
"๐พ Download Results CSV",
|
| 452 |
+
csv,
|
| 453 |
+
"sentiment_analysis_results.csv",
|
| 454 |
+
"text/csv",
|
| 455 |
+
use_container_width=True
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
except Exception as e:
|
| 459 |
+
st.error(f"โ Error processing file: {str(e)}")
|
| 460 |
+
|
| 461 |
+
# Footer
|
| 462 |
+
st.markdown("---")
|
| 463 |
+
st.markdown(
|
| 464 |
+
"<div style='text-align: center; color: #6B7280;'>"
|
| 465 |
+
"Built with โค๏ธ using Streamlit & Hugging Face Transformers"
|
| 466 |
+
"</div>",
|
| 467 |
+
unsafe_allow_html=True
|
| 468 |
+
)
|