Spaces:

kambris
/

LLMLPSentiment

Sleeping

App Files Files Community

kambris commited on Dec 11, 2025

Commit

fce0e3a

verified ·

1 Parent(s): d9f8ff2

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -26

app.py CHANGED Viewed

@@ -14,17 +14,57 @@ sentiment_pipeline = pipeline(
 # Store the analyzed dataframe globally
 analyzed_df = None
-def analyze_sentiment(file, column_name):
-    """Analyze sentiment for a specific column in uploaded file"""
     global analyzed_df
     try:
-        df = pd.read_csv(file.name)
-        if column_name not in df.columns:
-            return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
                     None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
         texts = df[column_name].fillna("").astype(str).tolist()
         results = sentiment_pipeline(texts, truncation=True, max_length=512)
@@ -263,15 +303,22 @@ Sentiment Percentages:
 # Create Gradio interface
 with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📊 Sentiment Analysis: Side-by-Side Group Comparison")
-    gr.Markdown("Upload CSV, analyze sentiment, then compare two groups (e.g., Arab vs Chinese, Singular vs Plural)")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Step 1: Upload & Analyze")
-            file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
             column_input = gr.Textbox(
-                label="Column to Analyze",
                 placeholder="e.g., 'review_text'",
                 value="text"
             )
@@ -282,21 +329,19 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
                 label="Compare by Column",
                 choices=[],
                 interactive=True,
-                info="e.g., 'language', 'response_type'"
             )
             with gr.Row():
                 group1_value = gr.Dropdown(
                     label="Group 1",
                     choices=[],
-                    interactive=True,
-                    info="e.g., 'Arab', 'Singular'"
                 )
                 group2_value = gr.Dropdown(
                     label="Group 2",
                     choices=[],
-                    interactive=True,
-                    info="e.g., 'Chinese', 'Plural'"
                 )
             compare_btn = gr.Button("⚖️ Compare Groups", variant="secondary", size="lg")
@@ -314,12 +359,12 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
             plot_hist = gr.Plot(label="Confidence Score Distribution")
     with gr.Row():
-        output_df = gr.Dataframe(label="Combined Data (Both Groups)", max_height=400)
     # Connect events
     analyze_btn.click(
-        fn=analyze_sentiment,
-        inputs=[file_input, column_input],
         outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
                 filter_column, group1_value, group2_value]
     )
@@ -338,24 +383,38 @@ with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as dem
     gr.Markdown("""
     ### 💡 How to use:
-    1. **Upload** your CSV file with a text column and grouping column(s)
-    2. **Analyze** sentiment on the text column
-    3. **Select comparison column** (e.g., 'language', 'response_type', 'nationality')
-    4. **Choose two groups** to compare (e.g., Arab vs Chinese, Singular vs Plural)
-    5. **View comparisons** across multiple visualizations
     ### 📈 Comparison Features:
     - Side-by-side pie charts showing sentiment distribution
     - Grouped bar chart comparing positive/negative percentages
     - Overlaid histogram comparing confidence score distributions
     - Detailed statistical summary with difference analysis
-    - Combined data table with group labels
     ### 🎯 Example Use Cases:
-    - Compare sentiment: Arab respondents vs Chinese respondents
-    - Analyze: Singular form responses vs Plural form responses
-    - Compare: Male vs Female sentiment patterns
     - Analyze: Product A reviews vs Product B reviews
     """)
 if __name__ == "__main__":

 # Store the analyzed dataframe globally
 analyzed_df = None
+def analyze_sentiment_files(file1, file2, file3, file4, file5, column_name):
+    """Analyze sentiment for multiple TXT files or a single CSV file"""
     global analyzed_df
     try:
+        # Collect all uploaded files
+        files = [f for f in [file1, file2, file3, file4, file5] if f is not None]
+        if not files:
+            return ("Please upload at least one file",
                     None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
+        # Check if we have TXT files or CSV
+        file_paths = [f.name for f in files]
+        if all(path.endswith('.txt') for path in file_paths):
+            # Handle multiple TXT files
+            all_data = []
+            for i, file in enumerate(files, 1):
+                with open(file.name, 'r', encoding='utf-8') as f:
+                    lines = f.readlines()
+                texts = [line.strip() for line in lines if line.strip()]
+                # Create dataframe for this file
+                file_df = pd.DataFrame({
+                    'text': texts,
+                    'line_number': range(1, len(texts) + 1),
+                    'file_name': f'File {i}',
+                    'source_file': file.name.split('/')[-1]
+                })
+                all_data.append(file_df)
+            # Combine all files
+            df = pd.concat(all_data, ignore_index=True)
+            column_name = 'text'
+        elif len(files) == 1 and file_paths[0].endswith('.csv'):
+            # Handle single CSV file
+            df = pd.read_csv(file_paths[0])
+            if column_name not in df.columns:
+                return (f"Error: Column '{column_name}' not found. Available columns: {', '.join(df.columns)}",
+                        None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
+        else:
+            return ("Error: Either upload multiple TXT files OR a single CSV file (not both)",
+                    None, None, None, None, gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]))
+        # Analyze sentiment
         texts = df[column_name].fillna("").astype(str).tolist()
         results = sentiment_pipeline(texts, truncation=True, max_length=512)
 # Create Gradio interface
 with gr.Blocks(title="Sentiment Comparison Tool", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📊 Sentiment Analysis: Multi-File Comparison")
+    gr.Markdown("Upload 2-5 TXT files to compare OR upload a single CSV file")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Step 1: Upload & Analyze")
+            gr.Markdown("**Upload Multiple TXT Files (2-5) OR Single CSV:**")
+            file1 = gr.File(label="File 1 (Required)", file_types=[".csv", ".txt"])
+            file2 = gr.File(label="File 2 (Optional)", file_types=[".txt"])
+            file3 = gr.File(label="File 3 (Optional)", file_types=[".txt"])
+            file4 = gr.File(label="File 4 (Optional)", file_types=[".txt"])
+            file5 = gr.File(label="File 5 (Optional)", file_types=[".txt"])
             column_input = gr.Textbox(
+                label="Column to Analyze (CSV only)",
                 placeholder="e.g., 'review_text'",
                 value="text"
             )
                 label="Compare by Column",
                 choices=[],
                 interactive=True,
+                info="Select 'file_name' to compare TXT files"
             )
             with gr.Row():
                 group1_value = gr.Dropdown(
                     label="Group 1",
                     choices=[],
+                    interactive=True
                 )
                 group2_value = gr.Dropdown(
                     label="Group 2",
                     choices=[],
+                    interactive=True
                 )
             compare_btn = gr.Button("⚖️ Compare Groups", variant="secondary", size="lg")
             plot_hist = gr.Plot(label="Confidence Score Distribution")
     with gr.Row():
+        output_df = gr.Dataframe(label="All Data", max_height=400)
     # Connect events
     analyze_btn.click(
+        fn=analyze_sentiment_files,
+        inputs=[file1, file2, file3, file4, file5, column_input],
         outputs=[summary_output, output_df, plot_pie, plot_bar, plot_hist,
                 filter_column, group1_value, group2_value]
     )
     gr.Markdown("""
     ### 💡 How to use:
+    **Option A: Multiple TXT Files (2-5 files)**
+    1. Upload 2-5 TXT files (one per upload slot)
+    2. Click "Analyze Sentiment" to process all files
+    3. Select "file_name" as the comparison column
+    4. Choose two files to compare (e.g., "File 1" vs "File 2")
+    5. Click "Compare Groups" to see side-by-side comparison
+    **Option B: Single CSV File**
+    1. Upload one CSV file with text column and grouping columns
+    2. Specify which column contains the text to analyze
+    3. Click "Analyze Sentiment"
+    4. Select any column to compare groups (e.g., language, category)
+    5. Choose two values to compare
+    ### 📂 File Format Details:
+    - **TXT files**: Each line is analyzed separately; files are labeled as "File 1", "File 2", etc.
+    - **CSV files**: Specify text column; can compare based on any categorical column
     ### 📈 Comparison Features:
     - Side-by-side pie charts showing sentiment distribution
     - Grouped bar chart comparing positive/negative percentages
     - Overlaid histogram comparing confidence score distributions
     - Detailed statistical summary with difference analysis
+    - Full data table with all analyzed text and sentiment scores
     ### 🎯 Example Use Cases:
+    - Compare sentiment across different text documents
+    - Analyze reviews from different sources
+    - Compare sentiment: Arab responses vs Chinese responses
     - Analyze: Product A reviews vs Product B reviews
+    - Compare: Pre-intervention vs Post-intervention feedback
     """)
 if __name__ == "__main__":