import os import subprocess # Ensure required packages are installed required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"] for package in required_packages: try: __import__(package) except ImportError: subprocess.check_call(["python3", "-m", "pip", "install", package]) import gradio as gr import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.ensemble import RandomForestClassifier import matplotlib.pyplot as plt from wordcloud import WordCloud from matplotlib.font_manager import FontProperties import os # Function to generate word cloud for cleaned_text def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5): """Generates a word cloud for cleaned text.""" wordcloud = WordCloud( width=width, height=height, background_color=background_color, stopwords=stopwords, collocations=collocations, min_font_size=min_font_size ).generate(text_data) plt.figure(figsize=(10, 10)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.title("Word Cloud") plt.show() # Function to process uploaded file and predict sentiment def analyze_sentiment(file): try: # Load CSV df = pd.read_csv(file.name) # Ensure the required column exists if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns: return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns." # Extract text and labels X = df['cleaned_text'] y = df['sentiment_label'] # Vectorize text using TF-IDF vectorizer = TfidfVectorizer() X_vectorized = vectorizer.fit_transform(X) # Train Random Forest Classifier on the entire dataset model = RandomForestClassifier(random_state=42) model.fit(X_vectorized, y) # Predict sentiment for the entire dataset df['predicted_sentiment'] = model.predict(X_vectorized) # Generate sentiment distribution histogram plt.figure(figsize=(8, 6)) sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100 sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7) plt.title("Predicted Sentiment Distribution") plt.xlabel("Sentiment Labels") plt.ylabel("Percentage") plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45) plt.grid(axis="y", linestyle="--", alpha=0.7) # Save the histogram as an image histogram_path = "sentiment_histogram.png" plt.tight_layout() plt.savefig(histogram_path) plt.close() # Generate a word cloud for cleaned_text text_data = " ".join(X.astype(str)) stopwords = set() # Plot and save the word cloud plot_wordcloud(text_data, stopwords) wordcloud_path = "wordcloud.png" plt.savefig(wordcloud_path) plt.close() # Display summary positive_percentage = sentiment_counts.get(1, 0) negative_percentage = sentiment_counts.get(0, 0) neutral_percentage = sentiment_counts.get(2, 0) summary = (f"Sentiment Summary:\n" f"Positive: {positive_percentage:.2f}%\n" f"Negative: {negative_percentage:.2f}%\n" f"Neutral: {neutral_percentage:.2f}%") # Display results return ( summary, histogram_path, wordcloud_path ) except Exception as e: return f"Error processing the file: {str(e)}" # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# Sentiment Analysis Chatbot") gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.") file_input = gr.File(label="Upload CSV File", file_types=[".csv"]) output_text = gr.Textbox(label="Message", lines=5) output_histogram = gr.Image(label="Sentiment Histogram") output_wordcloud = gr.Image(label="Word Cloud") analyze_button = gr.Button("Analyze Sentiment") analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud]) # Save as app.py demo.launch()