tiya1012's picture
Upload app.py
c457329 verified
import os
import subprocess
# Ensure required packages are installed
required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"]
for package in required_packages:
try:
__import__(package)
except ImportError:
subprocess.check_call(["python3", "-m", "pip", "install", package])
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from matplotlib.font_manager import FontProperties
import os
# Function to generate word cloud for cleaned_text
def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5):
"""Generates a word cloud for cleaned text."""
wordcloud = WordCloud(
width=width,
height=height,
background_color=background_color,
stopwords=stopwords,
collocations=collocations,
min_font_size=min_font_size
).generate(text_data)
plt.figure(figsize=(10, 10))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title("Word Cloud")
plt.show()
# Function to process uploaded file and predict sentiment
def analyze_sentiment(file):
try:
# Load CSV
df = pd.read_csv(file.name)
# Ensure the required column exists
if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns:
return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns."
# Extract text and labels
X = df['cleaned_text']
y = df['sentiment_label']
# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)
# Train Random Forest Classifier on the entire dataset
model = RandomForestClassifier(random_state=42)
model.fit(X_vectorized, y)
# Predict sentiment for the entire dataset
df['predicted_sentiment'] = model.predict(X_vectorized)
# Generate sentiment distribution histogram
plt.figure(figsize=(8, 6))
sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100
sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7)
plt.title("Predicted Sentiment Distribution")
plt.xlabel("Sentiment Labels")
plt.ylabel("Percentage")
plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)
# Save the histogram as an image
histogram_path = "sentiment_histogram.png"
plt.tight_layout()
plt.savefig(histogram_path)
plt.close()
# Generate a word cloud for cleaned_text
text_data = " ".join(X.astype(str))
stopwords = set()
# Plot and save the word cloud
plot_wordcloud(text_data, stopwords)
wordcloud_path = "wordcloud.png"
plt.savefig(wordcloud_path)
plt.close()
# Display summary
positive_percentage = sentiment_counts.get(1, 0)
negative_percentage = sentiment_counts.get(0, 0)
neutral_percentage = sentiment_counts.get(2, 0)
summary = (f"Sentiment Summary:\n"
f"Positive: {positive_percentage:.2f}%\n"
f"Negative: {negative_percentage:.2f}%\n"
f"Neutral: {neutral_percentage:.2f}%")
# Display results
return (
summary,
histogram_path,
wordcloud_path
)
except Exception as e:
return f"Error processing the file: {str(e)}"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Sentiment Analysis Chatbot")
gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.")
file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
output_text = gr.Textbox(label="Message", lines=5)
output_histogram = gr.Image(label="Sentiment Histogram")
output_wordcloud = gr.Image(label="Word Cloud")
analyze_button = gr.Button("Analyze Sentiment")
analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud])
# Save as app.py
demo.launch()