File size: 4,542 Bytes
c457329
 
 
 
 
 
 
 
 
 
 
49d943b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import subprocess

# Ensure required packages are installed
required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"]
for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call(["python3", "-m", "pip", "install", package])

import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from matplotlib.font_manager import FontProperties
import os

# Function to generate word cloud for cleaned_text
def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5):
    """Generates a word cloud for cleaned text."""
    wordcloud = WordCloud(
        width=width,
        height=height,
        background_color=background_color,
        stopwords=stopwords,
        collocations=collocations,
        min_font_size=min_font_size
    ).generate(text_data)

    plt.figure(figsize=(10, 10))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("Word Cloud")
    plt.show()

# Function to process uploaded file and predict sentiment
def analyze_sentiment(file):
    try:
        # Load CSV
        df = pd.read_csv(file.name)

        # Ensure the required column exists
        if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns:
            return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns."

        # Extract text and labels
        X = df['cleaned_text']
        y = df['sentiment_label']

        # Vectorize text using TF-IDF
        vectorizer = TfidfVectorizer()
        X_vectorized = vectorizer.fit_transform(X)

        # Train Random Forest Classifier on the entire dataset
        model = RandomForestClassifier(random_state=42)
        model.fit(X_vectorized, y)

        # Predict sentiment for the entire dataset
        df['predicted_sentiment'] = model.predict(X_vectorized)

        # Generate sentiment distribution histogram
        plt.figure(figsize=(8, 6))
        sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100
        sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7)
        plt.title("Predicted Sentiment Distribution")
        plt.xlabel("Sentiment Labels")
        plt.ylabel("Percentage")
        plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45)
        plt.grid(axis="y", linestyle="--", alpha=0.7)

        # Save the histogram as an image
        histogram_path = "sentiment_histogram.png"
        plt.tight_layout()
        plt.savefig(histogram_path)
        plt.close()

        # Generate a word cloud for cleaned_text
        text_data = " ".join(X.astype(str))
        stopwords = set()

        # Plot and save the word cloud
        plot_wordcloud(text_data, stopwords)

        wordcloud_path = "wordcloud.png"
        plt.savefig(wordcloud_path)
        plt.close()

        # Display summary
        positive_percentage = sentiment_counts.get(1, 0)
        negative_percentage = sentiment_counts.get(0, 0)
        neutral_percentage = sentiment_counts.get(2, 0)

        summary = (f"Sentiment Summary:\n"
                   f"Positive: {positive_percentage:.2f}%\n"
                   f"Negative: {negative_percentage:.2f}%\n"
                   f"Neutral: {neutral_percentage:.2f}%")

        # Display results
        return (
            summary,
            histogram_path,
            wordcloud_path
        )

    except Exception as e:
        return f"Error processing the file: {str(e)}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Sentiment Analysis Chatbot")
    gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.")

    file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
    output_text = gr.Textbox(label="Message", lines=5)
    output_histogram = gr.Image(label="Sentiment Histogram")
    output_wordcloud = gr.Image(label="Word Cloud")

    analyze_button = gr.Button("Analyze Sentiment")
    analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud])

# Save as app.py
demo.launch()