kambris commited on
Commit
fce0e3a
Β·
verified Β·
1 Parent(s): d9f8ff2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -26
app.py CHANGED
@@ -14,17 +14,57 @@ sentiment_pipeline = pipeline(
14
  # Store the analyzed dataframe globally
15
  analyzed_df = None
16
 
17
- def analyze_sentiment(file, column_name):
18
- """Analyze sentiment for a specific column in uploaded file"""
19
  global analyzed_df
20
 
21
  try:
22
- df = pd.read_csv(file.name)
 
23
 
24
- if column_name not in df.columns:
25
- return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
26
  None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  texts = df[column_name].fillna("").astype(str).tolist()
29
  results = sentiment_pipeline(texts, truncation=True, max_length=512)
30
 
@@ -263,15 +303,22 @@ Sentiment Percentages:
263
 
264
  # Create Gradio interface
265
  with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
266
- gr.Markdown("# πŸ“Š Sentiment Analysis: Side-by-Side Group Comparison")
267
- gr.Markdown("Upload CSV, analyze sentiment, then compare two groups (e.g., Arab vs Chinese, Singular vs Plural)")
268
 
269
  with gr.Row():
270
  with gr.Column(scale=1):
271
  gr.Markdown("### Step 1: Upload & Analyze")
272
- file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
 
 
 
 
 
 
 
273
  column_input = gr.Textbox(
274
- label="Column to Analyze",
275
  placeholder="e.g., 'review_text'",
276
  value="text"
277
  )
@@ -282,21 +329,19 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
282
  label="Compare by Column",
283
  choices=[],
284
  interactive=True,
285
- info="e.g., 'language', 'response_type'"
286
  )
287
 
288
  with gr.Row():
289
  group1_value = gr.Dropdown(
290
  label="Group 1",
291
  choices=[],
292
- interactive=True,
293
- info="e.g., 'Arab', 'Singular'"
294
  )
295
  group2_value = gr.Dropdown(
296
  label="Group 2",
297
  choices=[],
298
- interactive=True,
299
- info="e.g., 'Chinese', 'Plural'"
300
  )
301
 
302
  compare_btn = gr.Button("βš–οΈ Compare Groups", variant="secondary", size="lg")
@@ -314,12 +359,12 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
314
  plot_hist = gr.Plot(label="Confidence Score Distribution")
315
 
316
  with gr.Row():
317
- output_df = gr.Dataframe(label="Combined Data (Both Groups)", max_height=400)
318
 
319
  # Connect events
320
  analyze_btn.click(
321
- fn=analyze_sentiment,
322
- inputs=[file_input, column_input],
323
  outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
324
  filter_column, group1_value, group2_value]
325
  )
@@ -338,24 +383,38 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
338
 
339
  gr.Markdown("""
340
  ### πŸ’‘ How to use:
341
- 1. **Upload** your CSV file with a text column and grouping column(s)
342
- 2. **Analyze** sentiment on the text column
343
- 3. **Select comparison column** (e.g., 'language', 'response_type', 'nationality')
344
- 4. **Choose two groups** to compare (e.g., Arab vs Chinese, Singular vs Plural)
345
- 5. **View comparisons** across multiple visualizations
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  ### πŸ“ˆ Comparison Features:
348
  - Side-by-side pie charts showing sentiment distribution
349
  - Grouped bar chart comparing positive/negative percentages
350
  - Overlaid histogram comparing confidence score distributions
351
  - Detailed statistical summary with difference analysis
352
- - Combined data table with group labels
353
 
354
  ### 🎯 Example Use Cases:
355
- - Compare sentiment: Arab respondents vs Chinese respondents
356
- - Analyze: Singular form responses vs Plural form responses
357
- - Compare: Male vs Female sentiment patterns
358
  - Analyze: Product A reviews vs Product B reviews
 
359
  """)
360
 
361
  if __name__ == "__main__":
 
14
  # Store the analyzed dataframe globally
15
  analyzed_df = None
16
 
17
+ def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
18
+ """Analyze sentiment for multiple TXT files or a single CSV file"""
19
  global analyzed_df
20
 
21
  try:
22
+ # Collect all uploaded files
23
+ files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
24
 
25
+ if not files:
26
+ return ("Please upload at least one file",
27
  None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
28
 
29
+ # Check if we have TXT files or CSV
30
+ file_paths = [f.name for f in files]
31
+
32
+ if all(path.endswith('.txt') for path in file_paths):
33
+ # Handle multiple TXT files
34
+ all_data = []
35
+
36
+ for i, file in enumerate(files, 1):
37
+ with open(file.name, 'r', encoding='utf-8') as f:
38
+ lines = f.readlines()
39
+
40
+ texts = [line.strip() for line in lines if line.strip()]
41
+
42
+ # Create dataframe for this file
43
+ file_df = pd.DataFrame({
44
+ 'text': texts,
45
+ 'line_number': range(1, len(texts) + 1),
46
+ 'file_name': f'File {i}',
47
+ 'source_file': file.name.split('/')[-1]
48
+ })
49
+
50
+ all_data.append(file_df)
51
+
52
+ # Combine all files
53
+ df = pd.concat(all_data, ignore_index=True)
54
+ column_name = 'text'
55
+
56
+ elif len(files) == 1 and file_paths[0].endswith('.csv'):
57
+ # Handle single CSV file
58
+ df = pd.read_csv(file_paths[0])
59
+
60
+ if column_name not in df.columns:
61
+ return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
62
+ None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
63
+ else:
64
+ return ("Error: Either upload multiple TXT files OR a single CSV file (not both)",
65
+ None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
66
+
67
+ # Analyze sentiment
68
  texts = df[column_name].fillna("").astype(str).tolist()
69
  results = sentiment_pipeline(texts, truncation=True, max_length=512)
70
 
 
303
 
304
  # Create Gradio interface
305
  with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
306
+ gr.Markdown("# πŸ“Š Sentiment Analysis: Multi-File Comparison")
307
+ gr.Markdown("Upload 2-5 TXT files to compare OR upload a single CSV file")
308
 
309
  with gr.Row():
310
  with gr.Column(scale=1):
311
  gr.Markdown("### Step 1: Upload & Analyze")
312
+ gr.Markdown("**Upload Multiple TXT Files (2-5) OR Single CSV:**")
313
+
314
+ file1 = gr.File(label="File 1 (Required)", file_types=[".csv", ".txt"])
315
+ file2 = gr.File(label="File 2 (Optional)", file_types=[".txt"])
316
+ file3 = gr.File(label="File 3 (Optional)", file_types=[".txt"])
317
+ file4 = gr.File(label="File 4 (Optional)", file_types=[".txt"])
318
+ file5 = gr.File(label="File 5 (Optional)", file_types=[".txt"])
319
+
320
  column_input = gr.Textbox(
321
+ label="Column to Analyze (CSV only)",
322
  placeholder="e.g., 'review_text'",
323
  value="text"
324
  )
 
329
  label="Compare by Column",
330
  choices=[],
331
  interactive=True,
332
+ info="Select 'file_name' to compare TXT files"
333
  )
334
 
335
  with gr.Row():
336
  group1_value = gr.Dropdown(
337
  label="Group 1",
338
  choices=[],
339
+ interactive=True
 
340
  )
341
  group2_value = gr.Dropdown(
342
  label="Group 2",
343
  choices=[],
344
+ interactive=True
 
345
  )
346
 
347
  compare_btn = gr.Button("βš–οΈ Compare Groups", variant="secondary", size="lg")
 
359
  plot_hist = gr.Plot(label="Confidence Score Distribution")
360
 
361
  with gr.Row():
362
+ output_df = gr.Dataframe(label="All Data", max_height=400)
363
 
364
  # Connect events
365
  analyze_btn.click(
366
+ fn=analyze_sentiment_files,
367
+ inputs=[file1, file2, file3, file4, file5, column_input],
368
  outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
369
  filter_column, group1_value, group2_value]
370
  )
 
383
 
384
  gr.Markdown("""
385
  ### πŸ’‘ How to use:
386
+
387
+ **Option A: Multiple TXT Files (2-5 files)**
388
+ 1. Upload 2-5 TXT files (one per upload slot)
389
+ 2. Click "Analyze Sentiment" to process all files
390
+ 3. Select "file_name" as the comparison column
391
+ 4. Choose two files to compare (e.g., "File 1" vs "File 2")
392
+ 5. Click "Compare Groups" to see side-by-side comparison
393
+
394
+ **Option B: Single CSV File**
395
+ 1. Upload one CSV file with text column and grouping columns
396
+ 2. Specify which column contains the text to analyze
397
+ 3. Click "Analyze Sentiment"
398
+ 4. Select any column to compare groups (e.g., language, category)
399
+ 5. Choose two values to compare
400
+
401
+ ### πŸ“‚ File Format Details:
402
+ - **TXT files**: Each line is analyzed separately; files are labeled as "File 1", "File 2", etc.
403
+ - **CSV files**: Specify text column; can compare based on any categorical column
404
 
405
  ### πŸ“ˆ Comparison Features:
406
  - Side-by-side pie charts showing sentiment distribution
407
  - Grouped bar chart comparing positive/negative percentages
408
  - Overlaid histogram comparing confidence score distributions
409
  - Detailed statistical summary with difference analysis
410
+ - Full data table with all analyzed text and sentiment scores
411
 
412
  ### 🎯 Example Use Cases:
413
+ - Compare sentiment across different text documents
414
+ - Analyze reviews from different sources
415
+ - Compare sentiment: Arab responses vs Chinese responses
416
  - Analyze: Product A reviews vs Product B reviews
417
+ - Compare: Pre-intervention vs Post-intervention feedback
418
  """)
419
 
420
  if __name__ == "__main__":