Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import plotly.express as px
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from plotly.subplots import make_subplots
|
| 7 |
import spacy
|
|
|
|
| 8 |
|
| 9 |
# Load the English spaCy model (lightweight, 'sm' for small)
|
| 10 |
try:
|
|
@@ -20,40 +21,29 @@ sentiment_pipeline = pipeline(
|
|
| 20 |
model="distilbert-base-uncased-finetuned-sst-2-english"
|
| 21 |
)
|
| 22 |
|
| 23 |
-
# Store the analyzed dataframe globally
|
| 24 |
-
analyzed_df = None
|
| 25 |
-
|
| 26 |
# --- Function: Detect Passive Voice using spaCy ---
|
| 27 |
def is_passive(text):
|
| 28 |
"""Checks if a sentence is passive using spaCy's dependency parser."""
|
| 29 |
doc = nlp(text)
|
| 30 |
-
# A simple heuristic check for passive voice structure
|
| 31 |
-
# Look for a form of 'be' (auxpass) followed by a past participle (VERB/VBN)
|
| 32 |
for token in doc:
|
| 33 |
if token.dep_ == 'auxpass' and token.head.pos_ == 'VERB' and token.head.tag_ == 'VBN':
|
| 34 |
return True
|
| 35 |
return False
|
| 36 |
|
| 37 |
-
|
| 38 |
def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
|
| 39 |
"""Analyze sentiment and active/passive voice for multiple TXT files or a single CSV file"""
|
| 40 |
-
global
|
| 41 |
-
|
| 42 |
try:
|
| 43 |
-
# Collect all uploaded files
|
| 44 |
files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
|
| 45 |
|
| 46 |
if not files:
|
| 47 |
-
return ("Please upload at least one file",
|
| 48 |
-
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 49 |
|
| 50 |
-
# Check if we have TXT files or CSV
|
| 51 |
file_paths = [f.name for f in files]
|
| 52 |
|
| 53 |
if all(path.endswith('.txt') for path in file_paths):
|
| 54 |
-
# Handle multiple TXT files
|
| 55 |
all_data = []
|
| 56 |
-
|
| 57 |
for i, file in enumerate(files, 1):
|
| 58 |
try:
|
| 59 |
with open(file.name, 'r', encoding='utf-8') as f:
|
|
@@ -61,58 +51,32 @@ def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
|
|
| 61 |
except:
|
| 62 |
with open(file.name, 'r', encoding='latin-1') as f:
|
| 63 |
lines = f.readlines()
|
| 64 |
-
|
| 65 |
texts = [line.strip() for line in lines if line.strip()]
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
continue
|
| 69 |
-
|
| 70 |
-
# Create dataframe for this file
|
| 71 |
-
file_df = pd.DataFrame({
|
| 72 |
-
'text': texts,
|
| 73 |
-
'line_number': range(1, len(texts) + 1),
|
| 74 |
-
'file_name': f'File {i}',
|
| 75 |
-
'source_file': file.name.split('/')[-1].split('\\')[-1]
|
| 76 |
-
})
|
| 77 |
-
|
| 78 |
all_data.append(file_df)
|
| 79 |
-
|
| 80 |
if not all_data:
|
| 81 |
-
return ("Error: No valid text found in uploaded files",
|
| 82 |
-
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 83 |
-
|
| 84 |
-
# Combine all files
|
| 85 |
df = pd.concat(all_data, ignore_index=True)
|
| 86 |
column_name = 'text'
|
| 87 |
-
|
| 88 |
elif len(files) == 1 and file_paths[0].endswith('.csv'):
|
| 89 |
-
# Handle single CSV file
|
| 90 |
df = pd.read_csv(file_paths[0])
|
| 91 |
-
|
| 92 |
if column_name not in df.columns:
|
| 93 |
-
return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
|
| 94 |
-
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 95 |
else:
|
| 96 |
-
return ("Error: Either upload multiple TXT files OR a single CSV file (not both)",
|
| 97 |
-
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 98 |
|
| 99 |
-
# Analyze sentiment
|
| 100 |
texts = df[column_name].fillna("").astype(str).tolist()
|
| 101 |
results = sentiment_pipeline(texts, truncation=True, max_length=512)
|
| 102 |
-
|
| 103 |
df['sentiment_label'] = [r['label'] for r in results]
|
| 104 |
df['sentiment_score'] = [r['score'] for r in results]
|
| 105 |
-
|
| 106 |
-
# --- New Analysis: Active/Passive Voice ---
|
| 107 |
df['is_passive'] = df[column_name].apply(is_passive)
|
| 108 |
df['voice_label'] = df['is_passive'].apply(lambda x: 'PASSIVE' if x else 'ACTIVE')
|
| 109 |
|
| 110 |
-
analyzed_df = df
|
| 111 |
-
|
| 112 |
# Get all column names except sentiment/voice columns for filter options
|
| 113 |
filter_columns = [col for col in df.columns if col not in ['sentiment_label', 'sentiment_score', 'is_passive', 'voice_label']]
|
| 114 |
|
| 115 |
-
# Create initial summary with file breakdown if multiple TXT files
|
| 116 |
if 'file_name' in df.columns:
|
| 117 |
file_summary = "\n\n📁 FILES UPLOADED:\n"
|
| 118 |
for fname in df['file_name'].unique():
|
|
@@ -122,24 +86,23 @@ def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
|
|
| 122 |
else:
|
| 123 |
summary = create_summary(df, "All Data")
|
| 124 |
|
|
|
|
| 125 |
return (summary, df, None, None, None,
|
| 126 |
gr.update(choices=filter_columns, value='file_name' if 'file_name' in filter_columns else None),
|
| 127 |
gr.update(choices=[], value=None),
|
| 128 |
-
gr.update(choices=[], value=None)
|
|
|
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
-
import traceback
|
| 132 |
traceback.print_exc()
|
| 133 |
-
return f"Error: {str(e)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[])
|
| 134 |
|
| 135 |
-
# --- Summary Functions
|
| 136 |
|
| 137 |
def create_summary(df, title):
|
| 138 |
-
"""Generates a summary string including sentiment and voice stats."""
|
| 139 |
total_lines = len(df)
|
| 140 |
positive_pct = (df['sentiment_label'].value_counts(normalize=True).get('POSITIVE', 0) * 100)
|
| 141 |
-
passive_pct = (df['is_passive'].mean() * 100)
|
| 142 |
-
|
| 143 |
summary = (f"--- Summary for {title} ---\n"
|
| 144 |
f"Total Lines Analyzed: {total_lines}\n"
|
| 145 |
f"Positive Sentiment: {positive_pct:.1f}%\n"
|
|
@@ -150,53 +113,43 @@ def create_summary(df, title):
|
|
| 150 |
return summary
|
| 151 |
|
| 152 |
def create_comparison_summary(df1, df2, label1, label2):
|
| 153 |
-
"""Generates a comparison summary string."""
|
| 154 |
summary = f"📊 COMPARISON SUMMARY: {label1} vs {label2}\n\n"
|
| 155 |
summary += create_summary(df1, label1) + "\n\n"
|
| 156 |
summary += create_summary(df2, label2)
|
| 157 |
return summary
|
| 158 |
|
| 159 |
-
def get_filter_values(filter_column):
|
| 160 |
-
"""Get unique values for the selected filter column"""
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
if analyzed_df is None or filter_column is None:
|
| 164 |
return gr.update(choices=[]), gr.update(choices=[])
|
| 165 |
|
| 166 |
-
unique_values =
|
| 167 |
unique_values = [str(v) for v in unique_values][:100]
|
| 168 |
|
| 169 |
return gr.update(choices=unique_values, value=None), gr.update(choices=unique_values, value=None)
|
| 170 |
|
| 171 |
-
def compare_groups(filter_column, group1_value, group2_value):
|
| 172 |
-
"""Compare two groups side by side"""
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
if analyzed_df is None:
|
| 176 |
return "Please analyze sentiment first", None, None, None, None
|
| 177 |
|
| 178 |
if not filter_column or not group1_value or not group2_value:
|
| 179 |
return "Please select a filter column and both group values", None, None, None, None
|
| 180 |
|
| 181 |
-
df =
|
| 182 |
|
| 183 |
-
# Filter data for each group
|
| 184 |
df1 = df[df[filter_column].astype(str) == group1_value]
|
| 185 |
df2 = df[df[filter_column].astype(str) == group2_value]
|
| 186 |
|
| 187 |
if len(df1) == 0 or len(df2) == 0:
|
| 188 |
return "One or both groups have no data", None, None, None, None
|
| 189 |
|
| 190 |
-
# Create comparison visualizations
|
| 191 |
fig_pie = create_comparison_pie(df1, df2, group1_value, group2_value)
|
| 192 |
fig_bar = create_comparison_bar(df1, df2, group1_value, group2_value)
|
| 193 |
-
# Using the new voice bar chart instead of a generic histogram
|
| 194 |
fig_voice_bar = create_comparison_voice_bar(df1, df2, group1_value, group2_value)
|
| 195 |
|
| 196 |
-
# Create comparison summary
|
| 197 |
summary = create_comparison_summary(df1, df2, group1_value, group2_value)
|
| 198 |
|
| 199 |
-
# Combine dataframes with group labels
|
| 200 |
df1_display = df1.copy()
|
| 201 |
df1_display['comparison_group'] = group1_value
|
| 202 |
df2_display = df2.copy()
|
|
@@ -207,98 +160,35 @@ def compare_groups(filter_column, group1_value, group2_value):
|
|
| 207 |
|
| 208 |
|
| 209 |
def create_comparison_pie(df1, df2, label1, label2):
|
| 210 |
-
|
| 211 |
-
fig = make_subplots(
|
| 212 |
-
rows=1, cols=2,
|
| 213 |
-
specs=[[{'type':'pie'}, {'type':'pie'}]],
|
| 214 |
-
subplot_titles=(f'{label1}', f'{label2}')
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
# Group 1
|
| 218 |
counts1 = df1['sentiment_label'].value_counts()
|
| 219 |
-
fig.add_trace(go.Pie(
|
| 220 |
-
labels=counts1.index,
|
| 221 |
-
values=counts1.values,
|
| 222 |
-
name=label1,
|
| 223 |
-
marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts1.index],
|
| 224 |
-
textinfo='percent+label+value'
|
| 225 |
-
), row=1, col=1)
|
| 226 |
-
|
| 227 |
-
# Group 2
|
| 228 |
counts2 = df2['sentiment_label'].value_counts()
|
| 229 |
-
fig.add_trace(go.Pie(
|
| 230 |
-
labels=counts2.index,
|
| 231 |
-
values=counts2.values,
|
| 232 |
-
name=label2,
|
| 233 |
-
marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts2.index],
|
| 234 |
-
textinfo='percent+label+value'
|
| 235 |
-
), row=1, col=2)
|
| 236 |
-
|
| 237 |
fig.update_layout(title_text='Sentiment Distribution Comparison', height=400)
|
| 238 |
-
|
| 239 |
return fig
|
| 240 |
|
| 241 |
def create_comparison_bar(df1, df2, label1, label2):
|
| 242 |
-
|
| 243 |
counts1 = df1['sentiment_label'].value_counts(normalize=True) * 100
|
| 244 |
counts2 = df2['sentiment_label'].value_counts(normalize=True) * 100
|
| 245 |
-
|
| 246 |
sentiments = ['POSITIVE', 'NEGATIVE']
|
| 247 |
-
|
| 248 |
fig = go.Figure()
|
| 249 |
-
|
| 250 |
-
fig.add_trace(go.Bar(
|
| 251 |
-
name=label1,
|
| 252 |
-
x=sentiments,
|
| 253 |
-
y=[counts1.get(s, 0) for s in sentiments],
|
| 254 |
-
marker_color='#3b82f6',
|
| 255 |
-
text=[f"{counts1.get(s, 0):.1f}%" for s in sentiments],
|
| 256 |
-
textposition='auto'
|
| 257 |
-
))
|
| 258 |
-
|
| 259 |
-
fig.add_trace(go.Bar(
|
| 260 |
-
name=label2,
|
| 261 |
-
x=sentiments,
|
| 262 |
-
y=[counts2.get(s, 0) for s in sentiments],
|
| 263 |
-
marker_color='#ef4444',
|
| 264 |
-
text=[f"{counts2.get(s, 0):.1f}%" for s in sentiments],
|
| 265 |
-
textposition='auto'
|
| 266 |
-
))
|
| 267 |
-
|
| 268 |
fig.update_layout(title_text='Sentiment Percentage Comparison', barmode='group', height=400)
|
| 269 |
-
|
| 270 |
return fig
|
| 271 |
|
| 272 |
-
# --- New Function: Create Voice Comparison Bar Chart ---
|
| 273 |
def create_comparison_voice_bar(df1, df2, label1, label2):
|
| 274 |
-
|
| 275 |
counts1 = df1['voice_label'].value_counts(normalize=True) * 100
|
| 276 |
counts2 = df2['voice_label'].value_counts(normalize=True) * 100
|
| 277 |
-
|
| 278 |
voices = ['ACTIVE', 'PASSIVE']
|
| 279 |
-
|
| 280 |
fig = go.Figure()
|
| 281 |
-
|
| 282 |
-
fig.add_trace(go.Bar(
|
| 283 |
-
name=label1,
|
| 284 |
-
x=voices,
|
| 285 |
-
y=[counts1.get(s, 0) for s in voices],
|
| 286 |
-
marker_color='#10b981',
|
| 287 |
-
text=[f"{counts1.get(s, 0):.1f}%" for s in voices],
|
| 288 |
-
textposition='auto'
|
| 289 |
-
))
|
| 290 |
-
|
| 291 |
-
fig.add_trace(go.Bar(
|
| 292 |
-
name=label2,
|
| 293 |
-
x=voices,
|
| 294 |
-
y=[counts2.get(s, 0) for s in voices],
|
| 295 |
-
marker_color='#fbbf24',
|
| 296 |
-
text=[f"{counts2.get(s, 0):.1f}%" for s in voices],
|
| 297 |
-
textposition='auto'
|
| 298 |
-
))
|
| 299 |
-
|
| 300 |
fig.update_layout(title_text='Active vs. Passive Voice Percentage Comparison', barmode='group', height=400)
|
| 301 |
-
|
| 302 |
return fig
|
| 303 |
|
| 304 |
|
|
@@ -306,6 +196,8 @@ def create_comparison_voice_bar(df1, df2, label1, label2):
|
|
| 306 |
|
| 307 |
with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
|
| 308 |
gr.Markdown("# Advanced Text Analyzer: Sentiment, Active vs. Passive Voice")
|
|
|
|
|
|
|
| 309 |
|
| 310 |
with gr.Tab("Analyze Files"):
|
| 311 |
with gr.Row():
|
|
@@ -333,29 +225,33 @@ with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
|
|
| 333 |
comparison_summary_output = gr.Textbox(label="Comparison Summary", lines=15)
|
| 334 |
comparison_dataframe_output = gr.DataFrame(label="Comparison Data Results")
|
| 335 |
|
| 336 |
-
# Updated output slots for the new voice bar chart
|
| 337 |
comparison_pie_chart = gr.Plot(label="Sentiment Distribution Pie Chart")
|
| 338 |
comparison_bar_chart = gr.Plot(label="Sentiment Percentage Bar Chart")
|
| 339 |
comparison_voice_bar_chart = gr.Plot(label="Active/Passive Voice Bar Chart")
|
| 340 |
|
| 341 |
-
|
| 342 |
# --- Event Handlers ---
|
| 343 |
|
| 344 |
analyze_button.click(
|
| 345 |
fn=analyze_sentiment_files,
|
| 346 |
inputs=[file_input1, file_input2, file_input3, file_input4, file_input5, csv_column_name],
|
| 347 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
)
|
| 349 |
|
|
|
|
| 350 |
filter_col_dropdown.change(
|
| 351 |
fn=get_filter_values,
|
| 352 |
-
inputs=[filter_col_dropdown],
|
| 353 |
outputs=[group1_dropdown, group2_dropdown]
|
| 354 |
)
|
| 355 |
|
|
|
|
| 356 |
compare_button.click(
|
| 357 |
fn=compare_groups,
|
| 358 |
-
inputs=[filter_col_dropdown, group1_dropdown, group2_dropdown],
|
| 359 |
outputs=[comparison_summary_output, comparison_dataframe_output, comparison_pie_chart, comparison_bar_chart, comparison_voice_bar_chart]
|
| 360 |
)
|
| 361 |
|
|
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from plotly.subplots import make_subplots
|
| 7 |
import spacy
|
| 8 |
+
import traceback # Added for better error tracing
|
| 9 |
|
| 10 |
# Load the English spaCy model (lightweight, 'sm' for small)
|
| 11 |
try:
|
|
|
|
| 21 |
model="distilbert-base-uncased-finetuned-sst-2-english"
|
| 22 |
)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# --- Function: Detect Passive Voice using spaCy ---
|
| 25 |
def is_passive(text):
|
| 26 |
"""Checks if a sentence is passive using spaCy's dependency parser."""
|
| 27 |
doc = nlp(text)
|
|
|
|
|
|
|
| 28 |
for token in doc:
|
| 29 |
if token.dep_ == 'auxpass' and token.head.pos_ == 'VERB' and token.head.tag_ == 'VBN':
|
| 30 |
return True
|
| 31 |
return False
|
| 32 |
|
|
|
|
| 33 |
def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
|
| 34 |
"""Analyze sentiment and active/passive voice for multiple TXT files or a single CSV file"""
|
| 35 |
+
# analyzed_df is no longer global, it's returned by this function
|
| 36 |
+
|
| 37 |
try:
|
|
|
|
| 38 |
files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
|
| 39 |
|
| 40 |
if not files:
|
| 41 |
+
return ("Please upload at least one file", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
|
|
|
|
| 42 |
|
|
|
|
| 43 |
file_paths = [f.name for f in files]
|
| 44 |
|
| 45 |
if all(path.endswith('.txt') for path in file_paths):
|
|
|
|
| 46 |
all_data = []
|
|
|
|
| 47 |
for i, file in enumerate(files, 1):
|
| 48 |
try:
|
| 49 |
with open(file.name, 'r', encoding='utf-8') as f:
|
|
|
|
| 51 |
except:
|
| 52 |
with open(file.name, 'r', encoding='latin-1') as f:
|
| 53 |
lines = f.readlines()
|
|
|
|
| 54 |
texts = [line.strip() for line in lines if line.strip()]
|
| 55 |
+
if not texts: continue
|
| 56 |
+
file_df = pd.DataFrame({'text': texts, 'line_number': range(1, len(texts) + 1), 'file_name': f'File {i}', 'source_file': file.name.split('/')[-1].split('\\')[-1]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
all_data.append(file_df)
|
|
|
|
| 58 |
if not all_data:
|
| 59 |
+
return ("Error: No valid text found in uploaded files", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
|
|
|
|
|
|
|
|
|
|
| 60 |
df = pd.concat(all_data, ignore_index=True)
|
| 61 |
column_name = 'text'
|
|
|
|
| 62 |
elif len(files) == 1 and file_paths[0].endswith('.csv'):
|
|
|
|
| 63 |
df = pd.read_csv(file_paths[0])
|
|
|
|
| 64 |
if column_name not in df.columns:
|
| 65 |
+
return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
|
|
|
|
| 66 |
else:
|
| 67 |
+
return ("Error: Either upload multiple TXT files OR a single CSV file (not both)", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
|
|
|
|
| 68 |
|
| 69 |
+
# Analyze sentiment & voice
|
| 70 |
texts = df[column_name].fillna("").astype(str).tolist()
|
| 71 |
results = sentiment_pipeline(texts, truncation=True, max_length=512)
|
|
|
|
| 72 |
df['sentiment_label'] = [r['label'] for r in results]
|
| 73 |
df['sentiment_score'] = [r['score'] for r in results]
|
|
|
|
|
|
|
| 74 |
df['is_passive'] = df[column_name].apply(is_passive)
|
| 75 |
df['voice_label'] = df['is_passive'].apply(lambda x: 'PASSIVE' if x else 'ACTIVE')
|
| 76 |
|
|
|
|
|
|
|
| 77 |
# Get all column names except sentiment/voice columns for filter options
|
| 78 |
filter_columns = [col for col in df.columns if col not in ['sentiment_label', 'sentiment_score', 'is_passive', 'voice_label']]
|
| 79 |
|
|
|
|
| 80 |
if 'file_name' in df.columns:
|
| 81 |
file_summary = "\n\n📁 FILES UPLOADED:\n"
|
| 82 |
for fname in df['file_name'].unique():
|
|
|
|
| 86 |
else:
|
| 87 |
summary = create_summary(df, "All Data")
|
| 88 |
|
| 89 |
+
# Return the DF as the new state value
|
| 90 |
return (summary, df, None, None, None,
|
| 91 |
gr.update(choices=filter_columns, value='file_name' if 'file_name' in filter_columns else None),
|
| 92 |
gr.update(choices=[], value=None),
|
| 93 |
+
gr.update(choices=[], value=None),
|
| 94 |
+
df) # Return DF for the gr.State component
|
| 95 |
|
| 96 |
except Exception as e:
|
|
|
|
| 97 |
traceback.print_exc()
|
| 98 |
+
return f"Error: {str(e)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None
|
| 99 |
|
| 100 |
+
# --- Summary Functions ---
|
| 101 |
|
| 102 |
def create_summary(df, title):
|
|
|
|
| 103 |
total_lines = len(df)
|
| 104 |
positive_pct = (df['sentiment_label'].value_counts(normalize=True).get('POSITIVE', 0) * 100)
|
| 105 |
+
passive_pct = (df['is_passive'].mean() * 100)
|
|
|
|
| 106 |
summary = (f"--- Summary for {title} ---\n"
|
| 107 |
f"Total Lines Analyzed: {total_lines}\n"
|
| 108 |
f"Positive Sentiment: {positive_pct:.1f}%\n"
|
|
|
|
| 113 |
return summary
|
| 114 |
|
| 115 |
def create_comparison_summary(df1, df2, label1, label2):
|
|
|
|
| 116 |
summary = f"📊 COMPARISON SUMMARY: {label1} vs {label2}\n\n"
|
| 117 |
summary += create_summary(df1, label1) + "\n\n"
|
| 118 |
summary += create_summary(df2, label2)
|
| 119 |
return summary
|
| 120 |
|
| 121 |
+
def get_filter_values(filter_column, current_df_state):
|
| 122 |
+
"""Get unique values for the selected filter column using the state DF"""
|
| 123 |
+
if current_df_state is None or filter_column is None:
|
|
|
|
|
|
|
| 124 |
return gr.update(choices=[]), gr.update(choices=[])
|
| 125 |
|
| 126 |
+
unique_values = current_df_state[filter_column].dropna().unique().tolist()
|
| 127 |
unique_values = [str(v) for v in unique_values][:100]
|
| 128 |
|
| 129 |
return gr.update(choices=unique_values, value=None), gr.update(choices=unique_values, value=None)
|
| 130 |
|
| 131 |
+
def compare_groups(filter_column, group1_value, group2_value, current_df_state):
|
| 132 |
+
"""Compare two groups side by side using the state DF"""
|
| 133 |
+
if current_df_state is None:
|
|
|
|
|
|
|
| 134 |
return "Please analyze sentiment first", None, None, None, None
|
| 135 |
|
| 136 |
if not filter_column or not group1_value or not group2_value:
|
| 137 |
return "Please select a filter column and both group values", None, None, None, None
|
| 138 |
|
| 139 |
+
df = current_df_state.copy()
|
| 140 |
|
|
|
|
| 141 |
df1 = df[df[filter_column].astype(str) == group1_value]
|
| 142 |
df2 = df[df[filter_column].astype(str) == group2_value]
|
| 143 |
|
| 144 |
if len(df1) == 0 or len(df2) == 0:
|
| 145 |
return "One or both groups have no data", None, None, None, None
|
| 146 |
|
|
|
|
| 147 |
fig_pie = create_comparison_pie(df1, df2, group1_value, group2_value)
|
| 148 |
fig_bar = create_comparison_bar(df1, df2, group1_value, group2_value)
|
|
|
|
| 149 |
fig_voice_bar = create_comparison_voice_bar(df1, df2, group1_value, group2_value)
|
| 150 |
|
|
|
|
| 151 |
summary = create_comparison_summary(df1, df2, group1_value, group2_value)
|
| 152 |
|
|
|
|
| 153 |
df1_display = df1.copy()
|
| 154 |
df1_display['comparison_group'] = group1_value
|
| 155 |
df2_display = df2.copy()
|
|
|
|
| 160 |
|
| 161 |
|
| 162 |
def create_comparison_pie(df1, df2, label1, label2):
|
| 163 |
+
# (Function body is unchanged from previous response, uses plotly)
|
| 164 |
+
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'pie'}, {'type':'pie'}]], subplot_titles=(f'{label1}', f'{label2}'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
counts1 = df1['sentiment_label'].value_counts()
|
| 166 |
+
fig.add_trace(go.Pie(labels=counts1.index, values=counts1.values, name=label1, marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts1.index], textinfo='percent+label+value'), row=1, col=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
counts2 = df2['sentiment_label'].value_counts()
|
| 168 |
+
fig.add_trace(go.Pie(labels=counts2.index, values=counts2.values, name=label2, marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts2.index], textinfo='percent+label+value'), row=1, col=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
fig.update_layout(title_text='Sentiment Distribution Comparison', height=400)
|
|
|
|
| 170 |
return fig
|
| 171 |
|
| 172 |
def create_comparison_bar(df1, df2, label1, label2):
|
| 173 |
+
# (Function body is unchanged from previous response, uses plotly)
|
| 174 |
counts1 = df1['sentiment_label'].value_counts(normalize=True) * 100
|
| 175 |
counts2 = df2['sentiment_label'].value_counts(normalize=True) * 100
|
|
|
|
| 176 |
sentiments = ['POSITIVE', 'NEGATIVE']
|
|
|
|
| 177 |
fig = go.Figure()
|
| 178 |
+
fig.add_trace(go.Bar(name=label1, x=sentiments, y=[counts1.get(s, 0) for s in sentiments], marker_color='#3b82f6', text=[f"{counts1.get(s, 0):.1f}%" for s in sentiments], textposition='auto'))
|
| 179 |
+
fig.add_trace(go.Bar(name=label2, x=sentiments, y=[counts2.get(s, 0) for s in sentiments], marker_color='#ef4444', text=[f"{counts2.get(s, 0):.1f}%" for s in sentiments], textposition='auto'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
fig.update_layout(title_text='Sentiment Percentage Comparison', barmode='group', height=400)
|
|
|
|
| 181 |
return fig
|
| 182 |
|
|
|
|
| 183 |
def create_comparison_voice_bar(df1, df2, label1, label2):
|
| 184 |
+
# (Function body is unchanged from previous response, uses plotly)
|
| 185 |
counts1 = df1['voice_label'].value_counts(normalize=True) * 100
|
| 186 |
counts2 = df2['voice_label'].value_counts(normalize=True) * 100
|
|
|
|
| 187 |
voices = ['ACTIVE', 'PASSIVE']
|
|
|
|
| 188 |
fig = go.Figure()
|
| 189 |
+
fig.add_trace(go.Bar(name=label1, x=voices, y=[counts1.get(s, 0) for s in voices], marker_color='#10b981', text=[f"{counts1.get(s, 0):.1f}%" for s in voices], textposition='auto'))
|
| 190 |
+
fig.add_trace(go.Bar(name=label2, x=voices, y=[counts2.get(s, 0) for s in voices], marker_color='#fbbf24', text=[f"{counts2.get(s, 0):.1f}%" for s in voices], textposition='auto'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
fig.update_layout(title_text='Active vs. Passive Voice Percentage Comparison', barmode='group', height=400)
|
|
|
|
| 192 |
return fig
|
| 193 |
|
| 194 |
|
|
|
|
| 196 |
|
| 197 |
with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
|
| 198 |
gr.Markdown("# Advanced Text Analyzer: Sentiment, Active vs. Passive Voice")
|
| 199 |
+
# Define the state component to persist data across calls
|
| 200 |
+
analyzed_df_state = gr.State(value=None)
|
| 201 |
|
| 202 |
with gr.Tab("Analyze Files"):
|
| 203 |
with gr.Row():
|
|
|
|
| 225 |
comparison_summary_output = gr.Textbox(label="Comparison Summary", lines=15)
|
| 226 |
comparison_dataframe_output = gr.DataFrame(label="Comparison Data Results")
|
| 227 |
|
|
|
|
| 228 |
comparison_pie_chart = gr.Plot(label="Sentiment Distribution Pie Chart")
|
| 229 |
comparison_bar_chart = gr.Plot(label="Sentiment Percentage Bar Chart")
|
| 230 |
comparison_voice_bar_chart = gr.Plot(label="Active/Passive Voice Bar Chart")
|
| 231 |
|
|
|
|
| 232 |
# --- Event Handlers ---
|
| 233 |
|
| 234 |
analyze_button.click(
|
| 235 |
fn=analyze_sentiment_files,
|
| 236 |
inputs=[file_input1, file_input2, file_input3, file_input4, file_input5, csv_column_name],
|
| 237 |
+
outputs=[
|
| 238 |
+
summary_output, dataframe_output, comparison_pie_chart, comparison_bar_chart,
|
| 239 |
+
comparison_voice_bar_chart, filter_col_dropdown, group1_dropdown, group2_dropdown,
|
| 240 |
+
analyzed_df_state # IMPORTANT: Update the State variable with the new DF
|
| 241 |
+
]
|
| 242 |
)
|
| 243 |
|
| 244 |
+
# Pass the state DF to the value-getting function
|
| 245 |
filter_col_dropdown.change(
|
| 246 |
fn=get_filter_values,
|
| 247 |
+
inputs=[filter_col_dropdown, analyzed_df_state],
|
| 248 |
outputs=[group1_dropdown, group2_dropdown]
|
| 249 |
)
|
| 250 |
|
| 251 |
+
# Pass the state DF to the comparison function
|
| 252 |
compare_button.click(
|
| 253 |
fn=compare_groups,
|
| 254 |
+
inputs=[filter_col_dropdown, group1_dropdown, group2_dropdown, analyzed_df_state],
|
| 255 |
outputs=[comparison_summary_output, comparison_dataframe_output, comparison_pie_chart, comparison_bar_chart, comparison_voice_bar_chart]
|
| 256 |
)
|
| 257 |
|