Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| # Ensure required packages are installed | |
| required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"] | |
| for package in required_packages: | |
| try: | |
| __import__(package) | |
| except ImportError: | |
| subprocess.check_call(["python3", "-m", "pip", "install", package]) | |
| import gradio as gr | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.ensemble import RandomForestClassifier | |
| import matplotlib.pyplot as plt | |
| from wordcloud import WordCloud | |
| from matplotlib.font_manager import FontProperties | |
| import os | |
| # Function to generate word cloud for cleaned_text | |
| def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5): | |
| """Generates a word cloud for cleaned text.""" | |
| wordcloud = WordCloud( | |
| width=width, | |
| height=height, | |
| background_color=background_color, | |
| stopwords=stopwords, | |
| collocations=collocations, | |
| min_font_size=min_font_size | |
| ).generate(text_data) | |
| plt.figure(figsize=(10, 10)) | |
| plt.imshow(wordcloud, interpolation="bilinear") | |
| plt.axis("off") | |
| plt.title("Word Cloud") | |
| plt.show() | |
| # Function to process uploaded file and predict sentiment | |
| def analyze_sentiment(file): | |
| try: | |
| # Load CSV | |
| df = pd.read_csv(file.name) | |
| # Ensure the required column exists | |
| if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns: | |
| return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns." | |
| # Extract text and labels | |
| X = df['cleaned_text'] | |
| y = df['sentiment_label'] | |
| # Vectorize text using TF-IDF | |
| vectorizer = TfidfVectorizer() | |
| X_vectorized = vectorizer.fit_transform(X) | |
| # Train Random Forest Classifier on the entire dataset | |
| model = RandomForestClassifier(random_state=42) | |
| model.fit(X_vectorized, y) | |
| # Predict sentiment for the entire dataset | |
| df['predicted_sentiment'] = model.predict(X_vectorized) | |
| # Generate sentiment distribution histogram | |
| plt.figure(figsize=(8, 6)) | |
| sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100 | |
| sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7) | |
| plt.title("Predicted Sentiment Distribution") | |
| plt.xlabel("Sentiment Labels") | |
| plt.ylabel("Percentage") | |
| plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45) | |
| plt.grid(axis="y", linestyle="--", alpha=0.7) | |
| # Save the histogram as an image | |
| histogram_path = "sentiment_histogram.png" | |
| plt.tight_layout() | |
| plt.savefig(histogram_path) | |
| plt.close() | |
| # Generate a word cloud for cleaned_text | |
| text_data = " ".join(X.astype(str)) | |
| stopwords = set() | |
| # Plot and save the word cloud | |
| plot_wordcloud(text_data, stopwords) | |
| wordcloud_path = "wordcloud.png" | |
| plt.savefig(wordcloud_path) | |
| plt.close() | |
| # Display summary | |
| positive_percentage = sentiment_counts.get(1, 0) | |
| negative_percentage = sentiment_counts.get(0, 0) | |
| neutral_percentage = sentiment_counts.get(2, 0) | |
| summary = (f"Sentiment Summary:\n" | |
| f"Positive: {positive_percentage:.2f}%\n" | |
| f"Negative: {negative_percentage:.2f}%\n" | |
| f"Neutral: {neutral_percentage:.2f}%") | |
| # Display results | |
| return ( | |
| summary, | |
| histogram_path, | |
| wordcloud_path | |
| ) | |
| except Exception as e: | |
| return f"Error processing the file: {str(e)}" | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Sentiment Analysis Chatbot") | |
| gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.") | |
| file_input = gr.File(label="Upload CSV File", file_types=[".csv"]) | |
| output_text = gr.Textbox(label="Message", lines=5) | |
| output_histogram = gr.Image(label="Sentiment Histogram") | |
| output_wordcloud = gr.Image(label="Word Cloud") | |
| analyze_button = gr.Button("Analyze Sentiment") | |
| analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud]) | |
| # Save as app.py | |
| demo.launch() | |