Spaces:

tiya1012
/

sentimentanalyser

Running

App Files Files Community

sentimentanalyser / app.py

tiya1012

Upload app.py

c457329 verified 12 months ago

raw

history blame contribute delete

4.54 kB

	import os
	import subprocess

	# Ensure required packages are installed
	required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"]
	for package in required_packages:
	try:
	__import__(package)
	except ImportError:
	subprocess.check_call(["python3", "-m", "pip", "install", package])

	import gradio as gr
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.ensemble import RandomForestClassifier
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	from matplotlib.font_manager import FontProperties
	import os

	# Function to generate word cloud for cleaned_text
	def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5):
	"""Generates a word cloud for cleaned text."""
	wordcloud = WordCloud(
	width=width,
	height=height,
	background_color=background_color,
	stopwords=stopwords,
	collocations=collocations,
	min_font_size=min_font_size
	).generate(text_data)

	plt.figure(figsize=(10, 10))
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis("off")
	plt.title("Word Cloud")
	plt.show()

	# Function to process uploaded file and predict sentiment
	def analyze_sentiment(file):
	try:
	# Load CSV
	df = pd.read_csv(file.name)

	# Ensure the required column exists
	if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns:
	return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns."

	# Extract text and labels
	X = df['cleaned_text']
	y = df['sentiment_label']

	# Vectorize text using TF-IDF
	vectorizer = TfidfVectorizer()
	X_vectorized = vectorizer.fit_transform(X)

	# Train Random Forest Classifier on the entire dataset
	model = RandomForestClassifier(random_state=42)
	model.fit(X_vectorized, y)

	# Predict sentiment for the entire dataset
	df['predicted_sentiment'] = model.predict(X_vectorized)

	# Generate sentiment distribution histogram
	plt.figure(figsize=(8, 6))
	sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100
	sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7)
	plt.title("Predicted Sentiment Distribution")
	plt.xlabel("Sentiment Labels")
	plt.ylabel("Percentage")
	plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45)
	plt.grid(axis="y", linestyle="--", alpha=0.7)

	# Save the histogram as an image
	histogram_path = "sentiment_histogram.png"
	plt.tight_layout()
	plt.savefig(histogram_path)
	plt.close()

	# Generate a word cloud for cleaned_text
	text_data = " ".join(X.astype(str))
	stopwords = set()

	# Plot and save the word cloud
	plot_wordcloud(text_data, stopwords)

	wordcloud_path = "wordcloud.png"
	plt.savefig(wordcloud_path)
	plt.close()

	# Display summary
	positive_percentage = sentiment_counts.get(1, 0)
	negative_percentage = sentiment_counts.get(0, 0)
	neutral_percentage = sentiment_counts.get(2, 0)

	summary = (f"Sentiment Summary:\n"
	f"Positive: {positive_percentage:.2f}%\n"
	f"Negative: {negative_percentage:.2f}%\n"
	f"Neutral: {neutral_percentage:.2f}%")

	# Display results
	return (
	summary,
	histogram_path,
	wordcloud_path
	)

	except Exception as e:
	return f"Error processing the file: {str(e)}"

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# Sentiment Analysis Chatbot")
	gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.")

	file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
	output_text = gr.Textbox(label="Message", lines=5)
	output_histogram = gr.Image(label="Sentiment Histogram")
	output_wordcloud = gr.Image(label="Word Cloud")

	analyze_button = gr.Button("Analyze Sentiment")
	analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud])

	# Save as app.py
	demo.launch()