Spaces:

shresht8
/

sentiment-analysis-excel

Running

App Files Files Community

shresht8 commited on Jan 12, 2025

Commit

e0b4a17

verified ·

1 Parent(s): 71e3164

new new graphs and summaries

Browse files

Files changed (1) hide show

app.py +183 -70

app.py CHANGED Viewed

@@ -49,59 +49,106 @@ def process_single_sheet(df, product_name):
 def create_comparison_charts(sentiment_results):
     """
-    Create comparison charts for different products
-    Returns two plotly figures: bar chart and pie chart
     """
     # Prepare data for plotting
-    products = []
-    sentiments = []
-    counts = []
     for product, sentiment_counts in sentiment_results.items():
-        for sentiment, count in sentiment_counts.items():
-            products.append(product)
-            sentiments.append(sentiment)
-            counts.append(count)
-    plot_df = pd.DataFrame({
-        'Product': products,
-        'Sentiment': sentiments,
-        'Count': counts
     })
-    # Create stacked bar chart
-    bar_fig = px.bar(plot_df,
-                     x='Product',
-                     y='Count',
-                     color='Sentiment',
-                     title='Sentiment Distribution by Product',
-                     labels={'Count': 'Number of Reviews'},
-                     color_discrete_sequence=px.colors.qualitative.Set3)
-    # Create pie chart for overall sentiment distribution
-    pie_fig = px.pie(plot_df,
-                     values='Count',
-                     names='Sentiment',
-                     title='Overall Sentiment Distribution',
-                     color_discrete_sequence=px.colors.qualitative.Set3)
-    # Create summary table
-    summary_df = plot_df.pivot_table(
-        values='Count',
-        index='Product',
-        columns='Sentiment',
-        fill_value=0
-    ).round(2)
-    # Add total reviews column
-    summary_df['Total Reviews'] = summary_df.sum(axis=1)
-    # Calculate percentage of positive reviews (Positive + Very Positive)
-    positive_cols = ['Positive', 'Very Positive']
-    positive_cols = [col for col in positive_cols if col in summary_df.columns]
-    summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
-    return bar_fig, pie_fig, summary_df
 def process_file(file_obj):
@@ -114,7 +161,6 @@ def process_file(file_obj):
         all_processed_dfs = {}
         if file_path.endswith('.csv'):
-            # Process single CSV file
             df = pd.read_csv(file_path)
             product_name = "Product"  # Default name for CSV
             processed_df, sentiment_counts = process_single_sheet(df, product_name)
@@ -122,9 +168,7 @@ def process_file(file_obj):
             sentiment_results[product_name] = sentiment_counts
         elif file_path.endswith(('.xlsx', '.xls')):
-            # Process multiple sheets in Excel file
             excel_file = pd.ExcelFile(file_path)
             for sheet_name in excel_file.sheet_names:
                 df = pd.read_excel(file_path, sheet_name=sheet_name)
                 processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
@@ -134,21 +178,17 @@ def process_file(file_obj):
             raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
         # Create visualizations
-        bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results)
-        # Save results to a new Excel file
         output_path = "sentiment_analysis_results.xlsx"
         with pd.ExcelWriter(output_path) as writer:
-            # Save processed data
             for sheet_name, df in all_processed_dfs.items():
                 df.to_excel(writer, sheet_name=sheet_name, index=False)
-            # Save summary
-            summary_table.to_excel(writer, sheet_name='Summary', index=True)
         return (
-            bar_chart,
-            pie_chart,
             summary_table,
             output_path
         )
@@ -158,12 +198,86 @@ def process_file(file_obj):
 # Create Gradio interface
 with gr.Blocks() as interface:
-    gr.Markdown("# Multi-Product Review Sentiment Analysis")
     gr.Markdown("""
-    Upload a file to analyze sentiments:
-    - For CSV: Single product reviews with 'Reviews' column
-    - For Excel: Multiple sheets, each named after the product, with 'Reviews' column
     """)
     with gr.Row():
@@ -176,20 +290,19 @@ with gr.Blocks() as interface:
         analyze_btn = gr.Button("Analyze Sentiments")
     with gr.Row():
-        bar_plot = gr.Plot(label="Sentiment Distribution by Product")
-        pie_plot = gr.Plot(label="Overall Sentiment Distribution")
     with gr.Row():
-        summary_table = gr.Dataframe(label="Summary Statistics")
     with gr.Row():
-        output_file = gr.File(label="Download Detailed Results")
     analyze_btn.click(
         fn=process_file,
         inputs=[file_input],
-        outputs=[bar_plot, pie_plot, summary_table, output_file]
     )
-# Launch the interface
-interface.launch()

 def create_comparison_charts(sentiment_results):
     """
+    Create investment-focused comparison charts for different products
     """
     # Prepare data for plotting
+    plot_data = []
     for product, sentiment_counts in sentiment_results.items():
+        # Convert to dictionary and get sum
+        sentiment_dict = sentiment_counts.to_dict()
+        total = sum(sentiment_dict.values())
+        row = {
+            'Product': product,
+            'Total Reviews': total
+        }
+        # Calculate percentages for each sentiment
+        for sentiment, count in sentiment_dict.items():
+            row[sentiment] = (count / total) * 100
+        plot_data.append(row)
+    df = pd.DataFrame(plot_data)
+    # Ensure all sentiment columns exist (in case some products don't have all sentiments)
+    for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
+        if sentiment not in df.columns:
+            df[sentiment] = 0
+    # Calculate weighted sentiment score (0 to 100)
+    sentiment_weights = {
+        'Very Negative': 0,
+        'Negative': 25,
+        'Neutral': 50,
+        'Positive': 75,
+        'Very Positive': 100
+    }
+    df['Sentiment Score'] = 0
+    for product in df['Product']:
+        score = 0
+        for sentiment, weight in sentiment_weights.items():
+            if sentiment in df.columns:
+                score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100)
+        df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2)
+    # Create sentiment score chart
+    score_fig = go.Figure()
+    score_fig.add_trace(go.Bar(
+        x=df['Product'],
+        y=df['Sentiment Score'],
+        text=df['Sentiment Score'].round(1),
+        textposition='auto',
+        marker_color='rgb(65, 105, 225)'
+    ))
+    score_fig.update_layout(
+        title='Overall Sentiment Score by Product (0-100)',
+        yaxis_title='Weighted Sentiment Score',
+        yaxis_range=[0, 100],
+        showlegend=False
+    )
+    # Calculate Positive-Negative Ratios
+    df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
+    df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)
+    # Create Positive-Negative ratio chart
+    ratio_fig = go.Figure()
+    ratio_fig.add_trace(go.Bar(
+        name='Positive',
+        x=df['Product'],
+        y=df['Positive Ratio'],
+        marker_color='rgb(50, 205, 50)'
+    ))
+    ratio_fig.add_trace(go.Bar(
+        name='Negative',
+        x=df['Product'],
+        y=df['Negative Ratio'],
+        marker_color='rgb(220, 20, 60)'
+    ))
+    ratio_fig.update_layout(
+        barmode='group',
+        title='Positive vs Negative Sentiment Ratio by Product',
+        yaxis_title='Percentage (%)'
+    )
+    # Create summary table with investment-relevant metrics
+    summary_df = pd.DataFrame({
+        'Product': df['Product'],
+        'Total Reviews': df['Total Reviews'],
+        'Sentiment Score (0-100)': df['Sentiment Score'],
+        'Positive Ratio (%)': df['Positive Ratio'].round(2),
+        'Negative Ratio (%)': df['Negative Ratio'].round(2),
+        'Neutral Ratio (%)': df['Neutral'].round(2)
     })
+    # Calculate Confidence Score (avoiding division by zero)
+    summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) /
+                                      summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2)
+    # Sort by Sentiment Score for easy comparison
+    summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False)
+    return score_fig, ratio_fig, summary_df
 def process_file(file_obj):
         all_processed_dfs = {}
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
             product_name = "Product"  # Default name for CSV
             processed_df, sentiment_counts = process_single_sheet(df, product_name)
             sentiment_results[product_name] = sentiment_counts
         elif file_path.endswith(('.xlsx', '.xls')):
             excel_file = pd.ExcelFile(file_path)
             for sheet_name in excel_file.sheet_names:
                 df = pd.read_excel(file_path, sheet_name=sheet_name)
                 processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
             raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
         # Create visualizations
+        distribution_plot, summary_table = create_comparison_charts(sentiment_results)
+        # Save results
         output_path = "sentiment_analysis_results.xlsx"
         with pd.ExcelWriter(output_path) as writer:
             for sheet_name, df in all_processed_dfs.items():
                 df.to_excel(writer, sheet_name=sheet_name, index=False)
+            summary_table.to_excel(writer, sheet_name='Summary', index=False)
         return (
+            distribution_plot,
             summary_table,
             output_path
         )
 # Create Gradio interface
+# In the Gradio interface section
+def create_comparison_charts(sentiment_results):
+    """
+    Create simplified, investment-focused comparison charts
+    """
+    # Prepare data
+    plot_data = []
+    for product, sentiment_counts in sentiment_results.items():
+        sentiment_dict = sentiment_counts.to_dict()
+        total = sum(sentiment_dict.values())
+        row = {
+            'Product': product,
+            'Total Reviews': total
+        }
+        for sentiment, count in sentiment_dict.items():
+            row[sentiment] = (count / total) * 100
+        plot_data.append(row)
+    df = pd.DataFrame(plot_data)
+    # Ensure all sentiment columns exist
+    for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
+        if sentiment not in df.columns:
+            df[sentiment] = 0
+    # 1. Simple Stacked Bar Chart showing sentiment distribution
+    stack_fig = go.Figure()
+    sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
+    colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
+              'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
+              'rgb(192, 57, 43)']
+    for sentiment, color in zip(sentiments, colors):
+        stack_fig.add_trace(go.Bar(
+            name=sentiment,
+            x=df['Product'],
+            y=df[sentiment],
+            marker_color=color
+        ))
+    stack_fig.update_layout(
+        barmode='stack',
+        title='Sentiment Distribution by Product',
+        yaxis_title='Percentage (%)'
+    )
+    # 2. Aggregated Sentiment Ratios for Quick Comparison
+    df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1)
+    df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1)
+    summary_df = pd.DataFrame({
+        'Product': df['Product'],
+        'Total Reviews': df['Total Reviews'],
+        'Positive (%)': df['Positive_Total'].round(2),
+        'Neutral (%)': df['Neutral'].round(2),
+        'Negative (%)': df['Negative_Total'].round(2)
+    })
+    # Sort by Positive percentage for easy comparison
+    summary_df = summary_df.sort_values('Positive (%)', ascending=False)
+    return stack_fig, summary_df
+# Update the Gradio interface
 with gr.Blocks() as interface:
+    gr.Markdown("# Product Review Sentiment Analysis")
     gr.Markdown("""
+    ### Quick Guide
+    1. **Excel File (Multiple Products)**:
+       - Create separate sheets for each product
+       - Name sheets with product/company names
+       - Include "Reviews" column in each sheet
+    2. **CSV File (Single Product)**:
+       - Include "Reviews" column
+    Upload your file and click Analyze to get started.
     """)
     with gr.Row():
         analyze_btn = gr.Button("Analyze Sentiments")
     with gr.Row():
+        distribution_plot = gr.Plot(label="Sentiment Distribution")
     with gr.Row():
+        summary_table = gr.Dataframe(label="Summary Metrics")
     with gr.Row():
+        output_file = gr.File(label="Download Full Report")
     analyze_btn.click(
         fn=process_file,
         inputs=[file_input],
+        outputs=[distribution_plot, summary_table, output_file]
     )
+# launch interface
+interface.launch()