import gradio as gr
import joblib
import re
import nltk
import pandas as pd
from nltk.corpus import stopwords
import plotly.express as px

nltk.download("stopwords")

# Load model
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

stop_words = set(stopwords.words("english"))

# Clean text
def clean_text(text):
    
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    
    words = text.split()
    words = [w for w in words if w not in stop_words]
    
    return " ".join(words)


# Predict function
def predict_sentiment(review):

    review_clean = clean_text(review)

    vector = vectorizer.transform([review_clean])

    prediction = model.predict(vector)[0]

    probability = model.predict_proba(vector)[0]

    positive_prob = round(probability[1]*100,2)
    negative_prob = round(probability[0]*100,2)

    if prediction == 1:
        sentiment = "🟢 Positive"
    else:
        sentiment = "🔴 Negative"

    data = pd.DataFrame({
        "Sentiment":["Positive","Negative"],
        "Probability":[positive_prob,negative_prob]
    })

    return sentiment, data


# Batch review analysis
def analyze_multiple_reviews(text):

    reviews = [r.strip() for r in text.split("\n") if r.strip()]

    if len(reviews) == 0:
        return pd.DataFrame(), None

    predictions = []

    for r in reviews:
        clean = clean_text(r)
        vector = vectorizer.transform([clean])
        pred = model.predict(vector)[0]

        predictions.append("Positive" if pred == 1 else "Negative")

    df = pd.DataFrame({
        "Review": reviews,
        "Sentiment": predictions
    })

    # Create proper distribution
    sentiment_counts = {
        "Positive": predictions.count("Positive"),
        "Negative": predictions.count("Negative")
    }

    chart_df = pd.DataFrame([
        {"Sentiment": "Positive", "Count": sentiment_counts["Positive"]},
        {"Sentiment": "Negative", "Count": sentiment_counts["Negative"]}
    ])

    # Create Plotly chart
    fig = px.bar(
        chart_df,
        x="Sentiment",
        y="Count",
        color="Sentiment",
        text="Count"
    )

    fig.update_layout(title="Sentiment Distribution")

    return df, fig

    # FIX: Proper dataframe for chart
    chart_df = df["Sentiment"].value_counts().reset_index()
    chart_df.columns = ["Sentiment", "Count"]

    return df, chart_df

    # sentiment distribution
    sentiment_counts = df["Prediction"].value_counts()

    chart = pd.DataFrame({
        "Sentiment": sentiment_counts.index,
        "Count": sentiment_counts.values
    })

    return df, chart


# UI Layout
with gr.Blocks(theme=gr.themes.Soft()) as app:

    gr.Markdown(
    """
    # 🛍️ Amazon Review Sentiment Analyzer
    
    This AI system analyzes customer reviews and predicts sentiment using **TF-IDF + Logistic Regression**.
    """
    )

    with gr.Tab("Single Review Analysis"):

        review_input = gr.Textbox(
            lines=4,
            placeholder="Enter a customer review..."
        )

        analyze_button = gr.Button("Analyze Sentiment")

        sentiment_output = gr.Textbox(label="Prediction")

        probability_chart = gr.BarPlot(
            x="Sentiment",
            y="Probability",
            title="Sentiment Confidence"
        )

        analyze_button.click(
            predict_sentiment,
            inputs=review_input,
            outputs=[sentiment_output, probability_chart]
        )


    with gr.Tab("Bulk Review Analysis"):

        bulk_input = gr.Textbox(
            lines=10,
            placeholder="Enter multiple reviews (one per line)"
        )

        bulk_button = gr.Button("Analyze Reviews")

        results_table = gr.Dataframe()

        sentiment_chart = gr.Plot(label="Sentiment Distribution")
    
        bulk_button.click(
    analyze_multiple_reviews,
    inputs=bulk_input,
    outputs=[results_table, sentiment_chart]
        )


app.launch()