Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,17 +14,57 @@ sentiment_pipeline = pipeline(
|
|
| 14 |
# Store the analyzed dataframe globally
|
| 15 |
analyzed_df = None
|
| 16 |
|
| 17 |
-
def
|
| 18 |
-
"""Analyze sentiment for
|
| 19 |
global analyzed_df
|
| 20 |
|
| 21 |
try:
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
-
if
|
| 25 |
-
return (
|
| 26 |
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
texts = df[column_name].fillna("").astype(str).tolist()
|
| 29 |
results = sentiment_pipeline(texts, truncation=True, max_length=512)
|
| 30 |
|
|
@@ -263,15 +303,22 @@ Sentiment Percentages:
|
|
| 263 |
|
| 264 |
# Create Gradio interface
|
| 265 |
with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
|
| 266 |
-
gr.Markdown("# π Sentiment Analysis:
|
| 267 |
-
gr.Markdown("Upload
|
| 268 |
|
| 269 |
with gr.Row():
|
| 270 |
with gr.Column(scale=1):
|
| 271 |
gr.Markdown("### Step 1: Upload & Analyze")
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
column_input = gr.Textbox(
|
| 274 |
-
label="Column to Analyze",
|
| 275 |
placeholder="e.g., 'review_text'",
|
| 276 |
value="text"
|
| 277 |
)
|
|
@@ -282,21 +329,19 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
|
|
| 282 |
label="Compare by Column",
|
| 283 |
choices=[],
|
| 284 |
interactive=True,
|
| 285 |
-
info="
|
| 286 |
)
|
| 287 |
|
| 288 |
with gr.Row():
|
| 289 |
group1_value = gr.Dropdown(
|
| 290 |
label="Group 1",
|
| 291 |
choices=[],
|
| 292 |
-
interactive=True
|
| 293 |
-
info="e.g., 'Arab', 'Singular'"
|
| 294 |
)
|
| 295 |
group2_value = gr.Dropdown(
|
| 296 |
label="Group 2",
|
| 297 |
choices=[],
|
| 298 |
-
interactive=True
|
| 299 |
-
info="e.g., 'Chinese', 'Plural'"
|
| 300 |
)
|
| 301 |
|
| 302 |
compare_btn = gr.Button("βοΈ Compare Groups", variant="secondary", size="lg")
|
|
@@ -314,12 +359,12 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
|
|
| 314 |
plot_hist = gr.Plot(label="Confidence Score Distribution")
|
| 315 |
|
| 316 |
with gr.Row():
|
| 317 |
-
output_df = gr.Dataframe(label="
|
| 318 |
|
| 319 |
# Connect events
|
| 320 |
analyze_btn.click(
|
| 321 |
-
fn=
|
| 322 |
-
inputs=[
|
| 323 |
outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
|
| 324 |
filter_column, group1_value, group2_value]
|
| 325 |
)
|
|
@@ -338,24 +383,38 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
|
|
| 338 |
|
| 339 |
gr.Markdown("""
|
| 340 |
### π‘ How to use:
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
### π Comparison Features:
|
| 348 |
- Side-by-side pie charts showing sentiment distribution
|
| 349 |
- Grouped bar chart comparing positive/negative percentages
|
| 350 |
- Overlaid histogram comparing confidence score distributions
|
| 351 |
- Detailed statistical summary with difference analysis
|
| 352 |
-
-
|
| 353 |
|
| 354 |
### π― Example Use Cases:
|
| 355 |
-
- Compare sentiment
|
| 356 |
-
- Analyze
|
| 357 |
-
- Compare:
|
| 358 |
- Analyze: Product A reviews vs Product B reviews
|
|
|
|
| 359 |
""")
|
| 360 |
|
| 361 |
if __name__ == "__main__":
|
|
|
|
| 14 |
# Store the analyzed dataframe globally
|
| 15 |
analyzed_df = None
|
| 16 |
|
| 17 |
+
def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
|
| 18 |
+
"""Analyze sentiment for multiple TXT files or a single CSV file"""
|
| 19 |
global analyzed_df
|
| 20 |
|
| 21 |
try:
|
| 22 |
+
# Collect all uploaded files
|
| 23 |
+
files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
|
| 24 |
|
| 25 |
+
if not files:
|
| 26 |
+
return ("Please upload at least one file",
|
| 27 |
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 28 |
|
| 29 |
+
# Check if we have TXT files or CSV
|
| 30 |
+
file_paths = [f.name for f in files]
|
| 31 |
+
|
| 32 |
+
if all(path.endswith('.txt') for path in file_paths):
|
| 33 |
+
# Handle multiple TXT files
|
| 34 |
+
all_data = []
|
| 35 |
+
|
| 36 |
+
for i, file in enumerate(files, 1):
|
| 37 |
+
with open(file.name, 'r', encoding='utf-8') as f:
|
| 38 |
+
lines = f.readlines()
|
| 39 |
+
|
| 40 |
+
texts = [line.strip() for line in lines if line.strip()]
|
| 41 |
+
|
| 42 |
+
# Create dataframe for this file
|
| 43 |
+
file_df = pd.DataFrame({
|
| 44 |
+
'text': texts,
|
| 45 |
+
'line_number': range(1, len(texts) + 1),
|
| 46 |
+
'file_name': f'File {i}',
|
| 47 |
+
'source_file': file.name.split('/')[-1]
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
all_data.append(file_df)
|
| 51 |
+
|
| 52 |
+
# Combine all files
|
| 53 |
+
df = pd.concat(all_data, ignore_index=True)
|
| 54 |
+
column_name = 'text'
|
| 55 |
+
|
| 56 |
+
elif len(files) == 1 and file_paths[0].endswith('.csv'):
|
| 57 |
+
# Handle single CSV file
|
| 58 |
+
df = pd.read_csv(file_paths[0])
|
| 59 |
+
|
| 60 |
+
if column_name not in df.columns:
|
| 61 |
+
return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
|
| 62 |
+
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 63 |
+
else:
|
| 64 |
+
return ("Error: Either upload multiple TXT files OR a single CSV file (not both)",
|
| 65 |
+
None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
|
| 66 |
+
|
| 67 |
+
# Analyze sentiment
|
| 68 |
texts = df[column_name].fillna("").astype(str).tolist()
|
| 69 |
results = sentiment_pipeline(texts, truncation=True, max_length=512)
|
| 70 |
|
|
|
|
| 303 |
|
| 304 |
# Create Gradio interface
|
| 305 |
with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
|
| 306 |
+
gr.Markdown("# π Sentiment Analysis: Multi-File Comparison")
|
| 307 |
+
gr.Markdown("Upload 2-5 TXT files to compare OR upload a single CSV file")
|
| 308 |
|
| 309 |
with gr.Row():
|
| 310 |
with gr.Column(scale=1):
|
| 311 |
gr.Markdown("### Step 1: Upload & Analyze")
|
| 312 |
+
gr.Markdown("**Upload Multiple TXT Files (2-5) OR Single CSV:**")
|
| 313 |
+
|
| 314 |
+
file1 = gr.File(label="File 1 (Required)", file_types=[".csv", ".txt"])
|
| 315 |
+
file2 = gr.File(label="File 2 (Optional)", file_types=[".txt"])
|
| 316 |
+
file3 = gr.File(label="File 3 (Optional)", file_types=[".txt"])
|
| 317 |
+
file4 = gr.File(label="File 4 (Optional)", file_types=[".txt"])
|
| 318 |
+
file5 = gr.File(label="File 5 (Optional)", file_types=[".txt"])
|
| 319 |
+
|
| 320 |
column_input = gr.Textbox(
|
| 321 |
+
label="Column to Analyze (CSV only)",
|
| 322 |
placeholder="e.g., 'review_text'",
|
| 323 |
value="text"
|
| 324 |
)
|
|
|
|
| 329 |
label="Compare by Column",
|
| 330 |
choices=[],
|
| 331 |
interactive=True,
|
| 332 |
+
info="Select 'file_name' to compare TXT files"
|
| 333 |
)
|
| 334 |
|
| 335 |
with gr.Row():
|
| 336 |
group1_value = gr.Dropdown(
|
| 337 |
label="Group 1",
|
| 338 |
choices=[],
|
| 339 |
+
interactive=True
|
|
|
|
| 340 |
)
|
| 341 |
group2_value = gr.Dropdown(
|
| 342 |
label="Group 2",
|
| 343 |
choices=[],
|
| 344 |
+
interactive=True
|
|
|
|
| 345 |
)
|
| 346 |
|
| 347 |
compare_btn = gr.Button("βοΈ Compare Groups", variant="secondary", size="lg")
|
|
|
|
| 359 |
plot_hist = gr.Plot(label="Confidence Score Distribution")
|
| 360 |
|
| 361 |
with gr.Row():
|
| 362 |
+
output_df = gr.Dataframe(label="All Data", max_height=400)
|
| 363 |
|
| 364 |
# Connect events
|
| 365 |
analyze_btn.click(
|
| 366 |
+
fn=analyze_sentiment_files,
|
| 367 |
+
inputs=[file1, file2, file3, file4, file5, column_input],
|
| 368 |
outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
|
| 369 |
filter_column, group1_value, group2_value]
|
| 370 |
)
|
|
|
|
| 383 |
|
| 384 |
gr.Markdown("""
|
| 385 |
### π‘ How to use:
|
| 386 |
+
|
| 387 |
+
**Option A: Multiple TXT Files (2-5 files)**
|
| 388 |
+
1. Upload 2-5 TXT files (one per upload slot)
|
| 389 |
+
2. Click "Analyze Sentiment" to process all files
|
| 390 |
+
3. Select "file_name" as the comparison column
|
| 391 |
+
4. Choose two files to compare (e.g., "File 1" vs "File 2")
|
| 392 |
+
5. Click "Compare Groups" to see side-by-side comparison
|
| 393 |
+
|
| 394 |
+
**Option B: Single CSV File**
|
| 395 |
+
1. Upload one CSV file with text column and grouping columns
|
| 396 |
+
2. Specify which column contains the text to analyze
|
| 397 |
+
3. Click "Analyze Sentiment"
|
| 398 |
+
4. Select any column to compare groups (e.g., language, category)
|
| 399 |
+
5. Choose two values to compare
|
| 400 |
+
|
| 401 |
+
### π File Format Details:
|
| 402 |
+
- **TXT files**: Each line is analyzed separately; files are labeled as "File 1", "File 2", etc.
|
| 403 |
+
- **CSV files**: Specify text column; can compare based on any categorical column
|
| 404 |
|
| 405 |
### π Comparison Features:
|
| 406 |
- Side-by-side pie charts showing sentiment distribution
|
| 407 |
- Grouped bar chart comparing positive/negative percentages
|
| 408 |
- Overlaid histogram comparing confidence score distributions
|
| 409 |
- Detailed statistical summary with difference analysis
|
| 410 |
+
- Full data table with all analyzed text and sentiment scores
|
| 411 |
|
| 412 |
### π― Example Use Cases:
|
| 413 |
+
- Compare sentiment across different text documents
|
| 414 |
+
- Analyze reviews from different sources
|
| 415 |
+
- Compare sentiment: Arab responses vs Chinese responses
|
| 416 |
- Analyze: Product A reviews vs Product B reviews
|
| 417 |
+
- Compare: Pre-intervention vs Post-intervention feedback
|
| 418 |
""")
|
| 419 |
|
| 420 |
if __name__ == "__main__":
|