import os
import subprocess

# Ensure required packages are installed
required_packages = ["scikit-learn", "gradio", "matplotlib", "wordcloud", "pandas"]
for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call(["python3", "-m", "pip", "install", package])

import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from matplotlib.font_manager import FontProperties
import os

# Function to generate word cloud for cleaned_text
def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5):
    """Generates a word cloud for cleaned text."""
    wordcloud = WordCloud(
        width=width,
        height=height,
        background_color=background_color,
        stopwords=stopwords,
        collocations=collocations,
        min_font_size=min_font_size
    ).generate(text_data)

    plt.figure(figsize=(10, 10))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("Word Cloud")
    plt.show()

# Function to process uploaded file and predict sentiment
def analyze_sentiment(file):
    try:
        # Load CSV
        df = pd.read_csv(file.name)

        # Ensure the required column exists
        if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns:
            return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns."

        # Extract text and labels
        X = df['cleaned_text']
        y = df['sentiment_label']

        # Vectorize text using TF-IDF
        vectorizer = TfidfVectorizer()
        X_vectorized = vectorizer.fit_transform(X)

        # Train Random Forest Classifier on the entire dataset
        model = RandomForestClassifier(random_state=42)
        model.fit(X_vectorized, y)

        # Predict sentiment for the entire dataset
        df['predicted_sentiment'] = model.predict(X_vectorized)

        # Generate sentiment distribution histogram
        plt.figure(figsize=(8, 6))
        sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100
        sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7)
        plt.title("Predicted Sentiment Distribution")
        plt.xlabel("Sentiment Labels")
        plt.ylabel("Percentage")
        plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45)
        plt.grid(axis="y", linestyle="--", alpha=0.7)

        # Save the histogram as an image
        histogram_path = "sentiment_histogram.png"
        plt.tight_layout()
        plt.savefig(histogram_path)
        plt.close()

        # Generate a word cloud for cleaned_text
        text_data = " ".join(X.astype(str))
        stopwords = set()

        # Plot and save the word cloud
        plot_wordcloud(text_data, stopwords)

        wordcloud_path = "wordcloud.png"
        plt.savefig(wordcloud_path)
        plt.close()

        # Display summary
        positive_percentage = sentiment_counts.get(1, 0)
        negative_percentage = sentiment_counts.get(0, 0)
        neutral_percentage = sentiment_counts.get(2, 0)

        summary = (f"Sentiment Summary:\n"
                   f"Positive: {positive_percentage:.2f}%\n"
                   f"Negative: {negative_percentage:.2f}%\n"
                   f"Neutral: {neutral_percentage:.2f}%")

        # Display results
        return (
            summary,
            histogram_path,
            wordcloud_path
        )

    except Exception as e:
        return f"Error processing the file: {str(e)}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Sentiment Analysis Chatbot")
    gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.")

    file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
    output_text = gr.Textbox(label="Message", lines=5)
    output_histogram = gr.Image(label="Sentiment Histogram")
    output_wordcloud = gr.Image(label="Word Cloud")

    analyze_button = gr.Button("Analyze Sentiment")
    analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud])

# Save as app.py
demo.launch()