File size: 2,994 Bytes
54f0723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline

# Load the Hugging Face sentiment analysis model
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

def sentiment_analyzer(review):
    """Ensures the input is a string before passing it to the model"""
    if not isinstance(review, str) or pd.isna(review):  # Handle NaN values
        return "UNKNOWN"  # Default label for invalid inputs
    
    sentiment = analyzer([review])  # Wrap the input in a list
    return sentiment[0]['label']

def read_reviews_and_analyze_sentiment(file_path):
    """Reads an uploaded Excel file, validates format, performs sentiment analysis, and returns a DataFrame."""
    try:
        # Verify file extension
        if not file_path.endswith(".xlsx"):
            return "Error: Please upload a valid Excel (.xlsx) file."

        # Read Excel file properly
        df = pd.read_excel(file_path)

        # Ensure 'reviews' column exists
        if 'reviews' not in df.columns:
            return "Error: The Excel file must contain a 'reviews' column."

        # Convert all values to strings and handle NaN values
        df['reviews'] = df['reviews'].astype(str).fillna("")

        # Apply sentiment analysis
        df['Sentiment'] = df['reviews'].apply(sentiment_analyzer)

        return df
    except Exception as e:
        return f"Error processing file: {e}"

def generate_sentiment_pie_chart(df):
    """Generates a pie chart showing sentiment distribution with counts."""
    # Count occurrences of each sentiment category
    sentiment_counts = df['Sentiment'].value_counts()

    # Create pie chart
    fig, ax = plt.subplots()
    ax.pie(
        sentiment_counts, 
        labels=[f"{label} ({count})" for label, count in zip(sentiment_counts.index, sentiment_counts.values)],
        autopct='%1.1f%%', 
        colors=['green', 'red']
    )
    ax.set_title("Sentiment Distribution")

    return fig

def process_and_visualize_sentiment(file_path):
    """Processes the uploaded file and returns both a DataFrame and a sentiment pie chart."""
    df = read_reviews_and_analyze_sentiment(file_path)
    if isinstance(df, pd.DataFrame):  # Ensure a valid DataFrame is returned
        chart = generate_sentiment_pie_chart(df)
        return df[['reviews', 'Sentiment']], chart
    else:
        return df, None  # Return error message with no chart

# Gradio Setup for Hugging Face Spaces
demo = gr.Interface(
    fn=process_and_visualize_sentiment,
    inputs=gr.File(label="Upload an Excel file with reviews", type="filepath"),
    outputs=[gr.Dataframe(label="Sentiments"), gr.Plot(label="Sentiment Distribution")],
    title="Hugging Face Sentiment Analyzer",
    description="This application analyzes sentiments based on uploaded Excel reviews and generates a sentiment distribution pie chart."
)

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()