kambris commited on
Commit
0be81c2
·
verified ·
1 Parent(s): 6c8a927

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -149
app.py CHANGED
@@ -5,6 +5,7 @@ import plotly.express as px
5
  import plotly.graph_objects as go
6
  from plotly.subplots import make_subplots
7
  import spacy
 
8
 
9
  # Load the English spaCy model (lightweight, 'sm' for small)
10
  try:
@@ -20,40 +21,29 @@ sentiment_pipeline = pipeline(
20
  model="distilbert-base-uncased-finetuned-sst-2-english"
21
  )
22
 
23
- # Store the analyzed dataframe globally
24
- analyzed_df = None
25
-
26
  # --- Function: Detect Passive Voice using spaCy ---
27
  def is_passive(text):
28
  """Checks if a sentence is passive using spaCy's dependency parser."""
29
  doc = nlp(text)
30
- # A simple heuristic check for passive voice structure
31
- # Look for a form of 'be' (auxpass) followed by a past participle (VERB/VBN)
32
  for token in doc:
33
  if token.dep_ == 'auxpass' and token.head.pos_ == 'VERB' and token.head.tag_ == 'VBN':
34
  return True
35
  return False
36
 
37
-
38
  def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
39
  """Analyze sentiment and active/passive voice for multiple TXT files or a single CSV file"""
40
- global analyzed_df
41
-
42
  try:
43
- # Collect all uploaded files
44
  files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
45
 
46
  if not files:
47
- return ("Please upload at least one file",
48
- None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
49
 
50
- # Check if we have TXT files or CSV
51
  file_paths = [f.name for f in files]
52
 
53
  if all(path.endswith('.txt') for path in file_paths):
54
- # Handle multiple TXT files
55
  all_data = []
56
-
57
  for i, file in enumerate(files, 1):
58
  try:
59
  with open(file.name, 'r', encoding='utf-8') as f:
@@ -61,58 +51,32 @@ def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
61
  except:
62
  with open(file.name, 'r', encoding='latin-1') as f:
63
  lines = f.readlines()
64
-
65
  texts = [line.strip() for line in lines if line.strip()]
66
-
67
- if not texts:
68
- continue
69
-
70
- # Create dataframe for this file
71
- file_df = pd.DataFrame({
72
- 'text': texts,
73
- 'line_number': range(1, len(texts) + 1),
74
- 'file_name': f'File {i}',
75
- 'source_file': file.name.split('/')[-1].split('\\')[-1]
76
- })
77
-
78
  all_data.append(file_df)
79
-
80
  if not all_data:
81
- return ("Error: No valid text found in uploaded files",
82
- None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
83
-
84
- # Combine all files
85
  df = pd.concat(all_data, ignore_index=True)
86
  column_name = 'text'
87
-
88
  elif len(files) == 1 and file_paths[0].endswith('.csv'):
89
- # Handle single CSV file
90
  df = pd.read_csv(file_paths[0])
91
-
92
  if column_name not in df.columns:
93
- return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
94
- None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
95
  else:
96
- return ("Error: Either upload multiple TXT files OR a single CSV file (not both)",
97
- None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
98
 
99
- # Analyze sentiment
100
  texts = df[column_name].fillna("").astype(str).tolist()
101
  results = sentiment_pipeline(texts, truncation=True, max_length=512)
102
-
103
  df['sentiment_label'] = [r['label'] for r in results]
104
  df['sentiment_score'] = [r['score'] for r in results]
105
-
106
- # --- New Analysis: Active/Passive Voice ---
107
  df['is_passive'] = df[column_name].apply(is_passive)
108
  df['voice_label'] = df['is_passive'].apply(lambda x: 'PASSIVE' if x else 'ACTIVE')
109
 
110
- analyzed_df = df
111
-
112
  # Get all column names except sentiment/voice columns for filter options
113
  filter_columns = [col for col in df.columns if col not in ['sentiment_label', 'sentiment_score', 'is_passive', 'voice_label']]
114
 
115
- # Create initial summary with file breakdown if multiple TXT files
116
  if 'file_name' in df.columns:
117
  file_summary = "\n\n📁 FILES UPLOADED:\n"
118
  for fname in df['file_name'].unique():
@@ -122,24 +86,23 @@ def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
122
  else:
123
  summary = create_summary(df, "All Data")
124
 
 
125
  return (summary, df, None, None, None,
126
  gr.update(choices=filter_columns, value='file_name' if 'file_name' in filter_columns else None),
127
  gr.update(choices=[], value=None),
128
- gr.update(choices=[], value=None))
 
129
 
130
  except Exception as e:
131
- import traceback
132
  traceback.print_exc()
133
- return f"Error: {str(e)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[])
134
 
135
- # --- Summary Functions (Updated to include passive voice) ---
136
 
137
  def create_summary(df, title):
138
- """Generates a summary string including sentiment and voice stats."""
139
  total_lines = len(df)
140
  positive_pct = (df['sentiment_label'].value_counts(normalize=True).get('POSITIVE', 0) * 100)
141
- passive_pct = (df['is_passive'].mean() * 100) # Mean of True/False gives proportion of True
142
-
143
  summary = (f"--- Summary for {title} ---\n"
144
  f"Total Lines Analyzed: {total_lines}\n"
145
  f"Positive Sentiment: {positive_pct:.1f}%\n"
@@ -150,53 +113,43 @@ def create_summary(df, title):
150
  return summary
151
 
152
  def create_comparison_summary(df1, df2, label1, label2):
153
- """Generates a comparison summary string."""
154
  summary = f"📊 COMPARISON SUMMARY: {label1} vs {label2}\n\n"
155
  summary += create_summary(df1, label1) + "\n\n"
156
  summary += create_summary(df2, label2)
157
  return summary
158
 
159
- def get_filter_values(filter_column):
160
- """Get unique values for the selected filter column"""
161
- global analyzed_df
162
-
163
- if analyzed_df is None or filter_column is None:
164
  return gr.update(choices=[]), gr.update(choices=[])
165
 
166
- unique_values = analyzed_df[filter_column].dropna().unique().tolist()
167
  unique_values = [str(v) for v in unique_values][:100]
168
 
169
  return gr.update(choices=unique_values, value=None), gr.update(choices=unique_values, value=None)
170
 
171
- def compare_groups(filter_column, group1_value, group2_value):
172
- """Compare two groups side by side"""
173
- global analyzed_df
174
-
175
- if analyzed_df is None:
176
  return "Please analyze sentiment first", None, None, None, None
177
 
178
  if not filter_column or not group1_value or not group2_value:
179
  return "Please select a filter column and both group values", None, None, None, None
180
 
181
- df = analyzed_df.copy()
182
 
183
- # Filter data for each group
184
  df1 = df[df[filter_column].astype(str) == group1_value]
185
  df2 = df[df[filter_column].astype(str) == group2_value]
186
 
187
  if len(df1) == 0 or len(df2) == 0:
188
  return "One or both groups have no data", None, None, None, None
189
 
190
- # Create comparison visualizations
191
  fig_pie = create_comparison_pie(df1, df2, group1_value, group2_value)
192
  fig_bar = create_comparison_bar(df1, df2, group1_value, group2_value)
193
- # Using the new voice bar chart instead of a generic histogram
194
  fig_voice_bar = create_comparison_voice_bar(df1, df2, group1_value, group2_value)
195
 
196
- # Create comparison summary
197
  summary = create_comparison_summary(df1, df2, group1_value, group2_value)
198
 
199
- # Combine dataframes with group labels
200
  df1_display = df1.copy()
201
  df1_display['comparison_group'] = group1_value
202
  df2_display = df2.copy()
@@ -207,98 +160,35 @@ def compare_groups(filter_column, group1_value, group2_value):
207
 
208
 
209
  def create_comparison_pie(df1, df2, label1, label2):
210
- """Create side-by-side pie charts"""
211
- fig = make_subplots(
212
- rows=1, cols=2,
213
- specs=[[{'type':'pie'}, {'type':'pie'}]],
214
- subplot_titles=(f'{label1}', f'{label2}')
215
- )
216
-
217
- # Group 1
218
  counts1 = df1['sentiment_label'].value_counts()
219
- fig.add_trace(go.Pie(
220
- labels=counts1.index,
221
- values=counts1.values,
222
- name=label1,
223
- marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts1.index],
224
- textinfo='percent+label+value'
225
- ), row=1, col=1)
226
-
227
- # Group 2
228
  counts2 = df2['sentiment_label'].value_counts()
229
- fig.add_trace(go.Pie(
230
- labels=counts2.index,
231
- values=counts2.values,
232
- name=label2,
233
- marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts2.index],
234
- textinfo='percent+label+value'
235
- ), row=1, col=2)
236
-
237
  fig.update_layout(title_text='Sentiment Distribution Comparison', height=400)
238
-
239
  return fig
240
 
241
  def create_comparison_bar(df1, df2, label1, label2):
242
- """Create grouped bar chart comparing sentiment percentages"""
243
  counts1 = df1['sentiment_label'].value_counts(normalize=True) * 100
244
  counts2 = df2['sentiment_label'].value_counts(normalize=True) * 100
245
-
246
  sentiments = ['POSITIVE', 'NEGATIVE']
247
-
248
  fig = go.Figure()
249
-
250
- fig.add_trace(go.Bar(
251
- name=label1,
252
- x=sentiments,
253
- y=[counts1.get(s, 0) for s in sentiments],
254
- marker_color='#3b82f6',
255
- text=[f"{counts1.get(s, 0):.1f}%" for s in sentiments],
256
- textposition='auto'
257
- ))
258
-
259
- fig.add_trace(go.Bar(
260
- name=label2,
261
- x=sentiments,
262
- y=[counts2.get(s, 0) for s in sentiments],
263
- marker_color='#ef4444',
264
- text=[f"{counts2.get(s, 0):.1f}%" for s in sentiments],
265
- textposition='auto'
266
- ))
267
-
268
  fig.update_layout(title_text='Sentiment Percentage Comparison', barmode='group', height=400)
269
-
270
  return fig
271
 
272
- # --- New Function: Create Voice Comparison Bar Chart ---
273
  def create_comparison_voice_bar(df1, df2, label1, label2):
274
- """Create grouped bar chart comparing active vs passive voice percentages"""
275
  counts1 = df1['voice_label'].value_counts(normalize=True) * 100
276
  counts2 = df2['voice_label'].value_counts(normalize=True) * 100
277
-
278
  voices = ['ACTIVE', 'PASSIVE']
279
-
280
  fig = go.Figure()
281
-
282
- fig.add_trace(go.Bar(
283
- name=label1,
284
- x=voices,
285
- y=[counts1.get(s, 0) for s in voices],
286
- marker_color='#10b981',
287
- text=[f"{counts1.get(s, 0):.1f}%" for s in voices],
288
- textposition='auto'
289
- ))
290
-
291
- fig.add_trace(go.Bar(
292
- name=label2,
293
- x=voices,
294
- y=[counts2.get(s, 0) for s in voices],
295
- marker_color='#fbbf24',
296
- text=[f"{counts2.get(s, 0):.1f}%" for s in voices],
297
- textposition='auto'
298
- ))
299
-
300
  fig.update_layout(title_text='Active vs. Passive Voice Percentage Comparison', barmode='group', height=400)
301
-
302
  return fig
303
 
304
 
@@ -306,6 +196,8 @@ def create_comparison_voice_bar(df1, df2, label1, label2):
306
 
307
  with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
308
  gr.Markdown("# Advanced Text Analyzer: Sentiment, Active vs. Passive Voice")
 
 
309
 
310
  with gr.Tab("Analyze Files"):
311
  with gr.Row():
@@ -333,29 +225,33 @@ with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
333
  comparison_summary_output = gr.Textbox(label="Comparison Summary", lines=15)
334
  comparison_dataframe_output = gr.DataFrame(label="Comparison Data Results")
335
 
336
- # Updated output slots for the new voice bar chart
337
  comparison_pie_chart = gr.Plot(label="Sentiment Distribution Pie Chart")
338
  comparison_bar_chart = gr.Plot(label="Sentiment Percentage Bar Chart")
339
  comparison_voice_bar_chart = gr.Plot(label="Active/Passive Voice Bar Chart")
340
 
341
-
342
  # --- Event Handlers ---
343
 
344
  analyze_button.click(
345
  fn=analyze_sentiment_files,
346
  inputs=[file_input1, file_input2, file_input3, file_input4, file_input5, csv_column_name],
347
- outputs=[summary_output, dataframe_output, comparison_pie_chart, comparison_bar_chart, comparison_voice_bar_chart, filter_col_dropdown, group1_dropdown, group2_dropdown]
 
 
 
 
348
  )
349
 
 
350
  filter_col_dropdown.change(
351
  fn=get_filter_values,
352
- inputs=[filter_col_dropdown],
353
  outputs=[group1_dropdown, group2_dropdown]
354
  )
355
 
 
356
  compare_button.click(
357
  fn=compare_groups,
358
- inputs=[filter_col_dropdown, group1_dropdown, group2_dropdown],
359
  outputs=[comparison_summary_output, comparison_dataframe_output, comparison_pie_chart, comparison_bar_chart, comparison_voice_bar_chart]
360
  )
361
 
 
5
  import plotly.graph_objects as go
6
  from plotly.subplots import make_subplots
7
  import spacy
8
+ import traceback # Added for better error tracing
9
 
10
  # Load the English spaCy model (lightweight, 'sm' for small)
11
  try:
 
21
  model="distilbert-base-uncased-finetuned-sst-2-english"
22
  )
23
 
 
 
 
24
  # --- Function: Detect Passive Voice using spaCy ---
25
  def is_passive(text):
26
  """Checks if a sentence is passive using spaCy's dependency parser."""
27
  doc = nlp(text)
 
 
28
  for token in doc:
29
  if token.dep_ == 'auxpass' and token.head.pos_ == 'VERB' and token.head.tag_ == 'VBN':
30
  return True
31
  return False
32
 
 
33
  def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
34
  """Analyze sentiment and active/passive voice for multiple TXT files or a single CSV file"""
35
+ # analyzed_df is no longer global, it's returned by this function
36
+
37
  try:
 
38
  files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
39
 
40
  if not files:
41
+ return ("Please upload at least one file", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
 
42
 
 
43
  file_paths = [f.name for f in files]
44
 
45
  if all(path.endswith('.txt') for path in file_paths):
 
46
  all_data = []
 
47
  for i, file in enumerate(files, 1):
48
  try:
49
  with open(file.name, 'r', encoding='utf-8') as f:
 
51
  except:
52
  with open(file.name, 'r', encoding='latin-1') as f:
53
  lines = f.readlines()
 
54
  texts = [line.strip() for line in lines if line.strip()]
55
+ if not texts: continue
56
+ file_df = pd.DataFrame({'text': texts, 'line_number': range(1, len(texts) + 1), 'file_name': f'File {i}', 'source_file': file.name.split('/')[-1].split('\\')[-1]})
 
 
 
 
 
 
 
 
 
 
57
  all_data.append(file_df)
 
58
  if not all_data:
59
+ return ("Error: No valid text found in uploaded files", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
 
 
 
60
  df = pd.concat(all_data, ignore_index=True)
61
  column_name = 'text'
 
62
  elif len(files) == 1 and file_paths[0].endswith('.csv'):
 
63
  df = pd.read_csv(file_paths[0])
 
64
  if column_name not in df.columns:
65
+ return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
 
66
  else:
67
+ return ("Error: Either upload multiple TXT files OR a single CSV file (not both)", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None)
 
68
 
69
+ # Analyze sentiment & voice
70
  texts = df[column_name].fillna("").astype(str).tolist()
71
  results = sentiment_pipeline(texts, truncation=True, max_length=512)
 
72
  df['sentiment_label'] = [r['label'] for r in results]
73
  df['sentiment_score'] = [r['score'] for r in results]
 
 
74
  df['is_passive'] = df[column_name].apply(is_passive)
75
  df['voice_label'] = df['is_passive'].apply(lambda x: 'PASSIVE' if x else 'ACTIVE')
76
 
 
 
77
  # Get all column names except sentiment/voice columns for filter options
78
  filter_columns = [col for col in df.columns if col not in ['sentiment_label', 'sentiment_score', 'is_passive', 'voice_label']]
79
 
 
80
  if 'file_name' in df.columns:
81
  file_summary = "\n\n📁 FILES UPLOADED:\n"
82
  for fname in df['file_name'].unique():
 
86
  else:
87
  summary = create_summary(df, "All Data")
88
 
89
+ # Return the DF as the new state value
90
  return (summary, df, None, None, None,
91
  gr.update(choices=filter_columns, value='file_name' if 'file_name' in filter_columns else None),
92
  gr.update(choices=[], value=None),
93
+ gr.update(choices=[], value=None),
94
+ df) # Return DF for the gr.State component
95
 
96
  except Exception as e:
 
97
  traceback.print_exc()
98
+ return f"Error: {str(e)}", None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), None
99
 
100
+ # --- Summary Functions ---
101
 
102
  def create_summary(df, title):
 
103
  total_lines = len(df)
104
  positive_pct = (df['sentiment_label'].value_counts(normalize=True).get('POSITIVE', 0) * 100)
105
+ passive_pct = (df['is_passive'].mean() * 100)
 
106
  summary = (f"--- Summary for {title} ---\n"
107
  f"Total Lines Analyzed: {total_lines}\n"
108
  f"Positive Sentiment: {positive_pct:.1f}%\n"
 
113
  return summary
114
 
115
  def create_comparison_summary(df1, df2, label1, label2):
 
116
  summary = f"📊 COMPARISON SUMMARY: {label1} vs {label2}\n\n"
117
  summary += create_summary(df1, label1) + "\n\n"
118
  summary += create_summary(df2, label2)
119
  return summary
120
 
121
+ def get_filter_values(filter_column, current_df_state):
122
+ """Get unique values for the selected filter column using the state DF"""
123
+ if current_df_state is None or filter_column is None:
 
 
124
  return gr.update(choices=[]), gr.update(choices=[])
125
 
126
+ unique_values = current_df_state[filter_column].dropna().unique().tolist()
127
  unique_values = [str(v) for v in unique_values][:100]
128
 
129
  return gr.update(choices=unique_values, value=None), gr.update(choices=unique_values, value=None)
130
 
131
+ def compare_groups(filter_column, group1_value, group2_value, current_df_state):
132
+ """Compare two groups side by side using the state DF"""
133
+ if current_df_state is None:
 
 
134
  return "Please analyze sentiment first", None, None, None, None
135
 
136
  if not filter_column or not group1_value or not group2_value:
137
  return "Please select a filter column and both group values", None, None, None, None
138
 
139
+ df = current_df_state.copy()
140
 
 
141
  df1 = df[df[filter_column].astype(str) == group1_value]
142
  df2 = df[df[filter_column].astype(str) == group2_value]
143
 
144
  if len(df1) == 0 or len(df2) == 0:
145
  return "One or both groups have no data", None, None, None, None
146
 
 
147
  fig_pie = create_comparison_pie(df1, df2, group1_value, group2_value)
148
  fig_bar = create_comparison_bar(df1, df2, group1_value, group2_value)
 
149
  fig_voice_bar = create_comparison_voice_bar(df1, df2, group1_value, group2_value)
150
 
 
151
  summary = create_comparison_summary(df1, df2, group1_value, group2_value)
152
 
 
153
  df1_display = df1.copy()
154
  df1_display['comparison_group'] = group1_value
155
  df2_display = df2.copy()
 
160
 
161
 
162
  def create_comparison_pie(df1, df2, label1, label2):
163
+ # (Function body is unchanged from previous response, uses plotly)
164
+ fig = make_subplots(rows=1, cols=2, specs=[[{'type':'pie'}, {'type':'pie'}]], subplot_titles=(f'{label1}', f'{label2}'))
 
 
 
 
 
 
165
  counts1 = df1['sentiment_label'].value_counts()
166
+ fig.add_trace(go.Pie(labels=counts1.index, values=counts1.values, name=label1, marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts1.index], textinfo='percent+label+value'), row=1, col=1)
 
 
 
 
 
 
 
 
167
  counts2 = df2['sentiment_label'].value_counts()
168
+ fig.add_trace(go.Pie(labels=counts2.index, values=counts2.values, name=label2, marker_colors=['#10b981' if x=='POSITIVE' else '#ef4444' for x in counts2.index], textinfo='percent+label+value'), row=1, col=2)
 
 
 
 
 
 
 
169
  fig.update_layout(title_text='Sentiment Distribution Comparison', height=400)
 
170
  return fig
171
 
172
  def create_comparison_bar(df1, df2, label1, label2):
173
+ # (Function body is unchanged from previous response, uses plotly)
174
  counts1 = df1['sentiment_label'].value_counts(normalize=True) * 100
175
  counts2 = df2['sentiment_label'].value_counts(normalize=True) * 100
 
176
  sentiments = ['POSITIVE', 'NEGATIVE']
 
177
  fig = go.Figure()
178
+ fig.add_trace(go.Bar(name=label1, x=sentiments, y=[counts1.get(s, 0) for s in sentiments], marker_color='#3b82f6', text=[f"{counts1.get(s, 0):.1f}%" for s in sentiments], textposition='auto'))
179
+ fig.add_trace(go.Bar(name=label2, x=sentiments, y=[counts2.get(s, 0) for s in sentiments], marker_color='#ef4444', text=[f"{counts2.get(s, 0):.1f}%" for s in sentiments], textposition='auto'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  fig.update_layout(title_text='Sentiment Percentage Comparison', barmode='group', height=400)
 
181
  return fig
182
 
 
183
  def create_comparison_voice_bar(df1, df2, label1, label2):
184
+ # (Function body is unchanged from previous response, uses plotly)
185
  counts1 = df1['voice_label'].value_counts(normalize=True) * 100
186
  counts2 = df2['voice_label'].value_counts(normalize=True) * 100
 
187
  voices = ['ACTIVE', 'PASSIVE']
 
188
  fig = go.Figure()
189
+ fig.add_trace(go.Bar(name=label1, x=voices, y=[counts1.get(s, 0) for s in voices], marker_color='#10b981', text=[f"{counts1.get(s, 0):.1f}%" for s in voices], textposition='auto'))
190
+ fig.add_trace(go.Bar(name=label2, x=voices, y=[counts2.get(s, 0) for s in voices], marker_color='#fbbf24', text=[f"{counts2.get(s, 0):.1f}%" for s in voices], textposition='auto'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  fig.update_layout(title_text='Active vs. Passive Voice Percentage Comparison', barmode='group', height=400)
 
192
  return fig
193
 
194
 
 
196
 
197
  with gr.Blocks(title="Sentiment & Voice Analyzer") as demo:
198
  gr.Markdown("# Advanced Text Analyzer: Sentiment, Active vs. Passive Voice")
199
+ # Define the state component to persist data across calls
200
+ analyzed_df_state = gr.State(value=None)
201
 
202
  with gr.Tab("Analyze Files"):
203
  with gr.Row():
 
225
  comparison_summary_output = gr.Textbox(label="Comparison Summary", lines=15)
226
  comparison_dataframe_output = gr.DataFrame(label="Comparison Data Results")
227
 
 
228
  comparison_pie_chart = gr.Plot(label="Sentiment Distribution Pie Chart")
229
  comparison_bar_chart = gr.Plot(label="Sentiment Percentage Bar Chart")
230
  comparison_voice_bar_chart = gr.Plot(label="Active/Passive Voice Bar Chart")
231
 
 
232
  # --- Event Handlers ---
233
 
234
  analyze_button.click(
235
  fn=analyze_sentiment_files,
236
  inputs=[file_input1, file_input2, file_input3, file_input4, file_input5, csv_column_name],
237
+ outputs=[
238
+ summary_output, dataframe_output, comparison_pie_chart, comparison_bar_chart,
239
+ comparison_voice_bar_chart, filter_col_dropdown, group1_dropdown, group2_dropdown,
240
+ analyzed_df_state # IMPORTANT: Update the State variable with the new DF
241
+ ]
242
  )
243
 
244
+ # Pass the state DF to the value-getting function
245
  filter_col_dropdown.change(
246
  fn=get_filter_values,
247
+ inputs=[filter_col_dropdown, analyzed_df_state],
248
  outputs=[group1_dropdown, group2_dropdown]
249
  )
250
 
251
+ # Pass the state DF to the comparison function
252
  compare_button.click(
253
  fn=compare_groups,
254
+ inputs=[filter_col_dropdown, group1_dropdown, group2_dropdown, analyzed_df_state],
255
  outputs=[comparison_summary_output, comparison_dataframe_output, comparison_pie_chart, comparison_bar_chart, comparison_voice_bar_chart]
256
  )
257