Spaces:

shresht8
/

sentiment-analysis-excel

Sleeping

App Files Files Community

shresht8 commited on Jan 11, 2025

Commit

71e3164

verified ·

1 Parent(s): 0294743

Upload 2 files

Browse files

Files changed (2) hide show

app.py +125 -20
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import gradio as gr
 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Load model and tokenizer globally for efficiency
 model_name = "tabularisai/multilingual-sentiment-analysis"
@@ -27,36 +30,128 @@ def predict_sentiment(texts):
     return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
 def process_file(file_obj):
     """
     Process the input file and add sentiment analysis results
     """
     try:
-        # Read the file based on its extension
         file_path = file_obj.name
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
         elif file_path.endswith(('.xlsx', '.xls')):
-            df = pd.read_excel(file_path)
         else:
             raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
-        # Verify that 'Reviews' column exists
-        if 'Reviews' not in df.columns:
-            raise ValueError("Input file must contain a 'Reviews' column.")
-        # Perform sentiment analysis
-        reviews = df['Reviews'].fillna("")  # Handle any missing values
-        sentiments = predict_sentiment(reviews.tolist())
-        # Add results to the dataframe
-        df['Sentiment'] = sentiments
-        # Save the results to a new Excel file
-        output_path = "output_with_sentiment.xlsx"
-        df.to_excel(output_path, index=False)
-        return df, output_path
     except Exception as e:
         raise gr.Error(str(e))
@@ -64,8 +159,12 @@ def process_file(file_obj):
 # Create Gradio interface
 with gr.Blocks() as interface:
-    gr.Markdown("# Review Sentiment Analysis")
-    gr.Markdown("Upload an Excel or CSV file with a 'Reviews' column to analyze sentiment.")
     with gr.Row():
         file_input = gr.File(
@@ -77,13 +176,19 @@ with gr.Blocks() as interface:
         analyze_btn = gr.Button("Analyze Sentiments")
     with gr.Row():
-        output_df = gr.Dataframe(label="Results Preview")
-        output_file = gr.File(label="Download Results")
     analyze_btn.click(
         fn=process_file,
         inputs=[file_input],
-        outputs=[output_df, output_file]
     )
 # Launch the interface

 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import plotly.express as px
+import plotly.graph_objects as go
+from collections import defaultdict
 # Load model and tokenizer globally for efficiency
 model_name = "tabularisai/multilingual-sentiment-analysis"
     return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
+def process_single_sheet(df, product_name):
+    """
+    Process a single dataframe and return sentiment analysis results
+    """
+    if 'Reviews' not in df.columns:
+        raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")
+    reviews = df['Reviews'].fillna("")
+    sentiments = predict_sentiment(reviews.tolist())
+    df['Sentiment'] = sentiments
+    # Calculate sentiment distribution
+    sentiment_counts = pd.Series(sentiments).value_counts()
+    return df, sentiment_counts
+def create_comparison_charts(sentiment_results):
+    """
+    Create comparison charts for different products
+    Returns two plotly figures: bar chart and pie chart
+    """
+    # Prepare data for plotting
+    products = []
+    sentiments = []
+    counts = []
+    for product, sentiment_counts in sentiment_results.items():
+        for sentiment, count in sentiment_counts.items():
+            products.append(product)
+            sentiments.append(sentiment)
+            counts.append(count)
+    plot_df = pd.DataFrame({
+        'Product': products,
+        'Sentiment': sentiments,
+        'Count': counts
+    })
+    # Create stacked bar chart
+    bar_fig = px.bar(plot_df,
+                     x='Product',
+                     y='Count',
+                     color='Sentiment',
+                     title='Sentiment Distribution by Product',
+                     labels={'Count': 'Number of Reviews'},
+                     color_discrete_sequence=px.colors.qualitative.Set3)
+    # Create pie chart for overall sentiment distribution
+    pie_fig = px.pie(plot_df,
+                     values='Count',
+                     names='Sentiment',
+                     title='Overall Sentiment Distribution',
+                     color_discrete_sequence=px.colors.qualitative.Set3)
+    # Create summary table
+    summary_df = plot_df.pivot_table(
+        values='Count',
+        index='Product',
+        columns='Sentiment',
+        fill_value=0
+    ).round(2)
+    # Add total reviews column
+    summary_df['Total Reviews'] = summary_df.sum(axis=1)
+    # Calculate percentage of positive reviews (Positive + Very Positive)
+    positive_cols = ['Positive', 'Very Positive']
+    positive_cols = [col for col in positive_cols if col in summary_df.columns]
+    summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
+    return bar_fig, pie_fig, summary_df
 def process_file(file_obj):
     """
     Process the input file and add sentiment analysis results
     """
     try:
         file_path = file_obj.name
+        sentiment_results = defaultdict(pd.Series)
+        all_processed_dfs = {}
         if file_path.endswith('.csv'):
+            # Process single CSV file
             df = pd.read_csv(file_path)
+            product_name = "Product"  # Default name for CSV
+            processed_df, sentiment_counts = process_single_sheet(df, product_name)
+            all_processed_dfs[product_name] = processed_df
+            sentiment_results[product_name] = sentiment_counts
         elif file_path.endswith(('.xlsx', '.xls')):
+            # Process multiple sheets in Excel file
+            excel_file = pd.ExcelFile(file_path)
+            for sheet_name in excel_file.sheet_names:
+                df = pd.read_excel(file_path, sheet_name=sheet_name)
+                processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
+                all_processed_dfs[sheet_name] = processed_df
+                sentiment_results[sheet_name] = sentiment_counts
         else:
             raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
+        # Create visualizations
+        bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results)
+        # Save results to a new Excel file
+        output_path = "sentiment_analysis_results.xlsx"
+        with pd.ExcelWriter(output_path) as writer:
+            # Save processed data
+            for sheet_name, df in all_processed_dfs.items():
+                df.to_excel(writer, sheet_name=sheet_name, index=False)
+            # Save summary
+            summary_table.to_excel(writer, sheet_name='Summary', index=True)
+        return (
+            bar_chart,
+            pie_chart,
+            summary_table,
+            output_path
+        )
     except Exception as e:
         raise gr.Error(str(e))
 # Create Gradio interface
 with gr.Blocks() as interface:
+    gr.Markdown("# Multi-Product Review Sentiment Analysis")
+    gr.Markdown("""
+    Upload a file to analyze sentiments:
+    - For CSV: Single product reviews with 'Reviews' column
+    - For Excel: Multiple sheets, each named after the product, with 'Reviews' column
+    """)
     with gr.Row():
         file_input = gr.File(
         analyze_btn = gr.Button("Analyze Sentiments")
     with gr.Row():
+        bar_plot = gr.Plot(label="Sentiment Distribution by Product")
+        pie_plot = gr.Plot(label="Overall Sentiment Distribution")
+    with gr.Row():
+        summary_table = gr.Dataframe(label="Summary Statistics")
+    with gr.Row():
+        output_file = gr.File(label="Download Detailed Results")
     analyze_btn.click(
         fn=process_file,
         inputs=[file_input],
+        outputs=[bar_plot, pie_plot, summary_table, output_file]
     )
 # Launch the interface

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ transformers
 openpyxl
 torch
 pandas
-gradio

 openpyxl
 torch
 pandas
+gradio
+plotly