File size: 3,967 Bytes
16e1672
 
 
 
45e3595
4f06213
6ddb7d7
16e1672
4f06213
16e1672
45e3595
16e1672
 
 
4f06213
16e1672
45e3595
16e1672
4f06213
16e1672
4f06213
 
16e1672
 
4f06213
16e1672
 
 
45e3595
4f06213
45e3595
4f06213
45e3595
4f06213
45e3595
4f06213
45e3595
4f06213
45e3595
 
 
4f06213
 
45e3595
16e1672
45e3595
4f06213
45e3595
 
 
 
4f06213
45e3595
4f06213
45e3595
 
 
 
5b2215c
45e3595
6ddb7d7
 
 
8aefc78
45e3595
 
 
 
 
 
5b2215c
45e3595
 
8aefc78
5b2215c
45e3595
 
6ddb7d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b2215c
 
 
 
 
 
8aefc78
45e3595
 
8aefc78
 
 
 
45e3595
 
 
 
 
 
 
 
 
 
4f06213
45e3595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ddb7d7
 
45e3595
6ddb7d7
 
 
45e3595
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import gradio as gr
import joblib
import re
import nltk
import pandas as pd
from nltk.corpus import stopwords
import plotly.express as px

nltk.download("stopwords")

# Load model
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

stop_words = set(stopwords.words("english"))

# Clean text
def clean_text(text):
    
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    
    words = text.split()
    words = [w for w in words if w not in stop_words]
    
    return " ".join(words)


# Predict function
def predict_sentiment(review):

    review_clean = clean_text(review)

    vector = vectorizer.transform([review_clean])

    prediction = model.predict(vector)[0]

    probability = model.predict_proba(vector)[0]

    positive_prob = round(probability[1]*100,2)
    negative_prob = round(probability[0]*100,2)

    if prediction == 1:
        sentiment = "🟢 Positive"
    else:
        sentiment = "🔴 Negative"

    data = pd.DataFrame({
        "Sentiment":["Positive","Negative"],
        "Probability":[positive_prob,negative_prob]
    })

    return sentiment, data


# Batch review analysis
def analyze_multiple_reviews(text):

    reviews = [r.strip() for r in text.split("\n") if r.strip()]

    if len(reviews) == 0:
        return pd.DataFrame(), None

    predictions = []

    for r in reviews:
        clean = clean_text(r)
        vector = vectorizer.transform([clean])
        pred = model.predict(vector)[0]

        predictions.append("Positive" if pred == 1 else "Negative")

    df = pd.DataFrame({
        "Review": reviews,
        "Sentiment": predictions
    })

    # Create proper distribution
    sentiment_counts = {
        "Positive": predictions.count("Positive"),
        "Negative": predictions.count("Negative")
    }

    chart_df = pd.DataFrame([
        {"Sentiment": "Positive", "Count": sentiment_counts["Positive"]},
        {"Sentiment": "Negative", "Count": sentiment_counts["Negative"]}
    ])

    # Create Plotly chart
    fig = px.bar(
        chart_df,
        x="Sentiment",
        y="Count",
        color="Sentiment",
        text="Count"
    )

    fig.update_layout(title="Sentiment Distribution")

    return df, fig

    # FIX: Proper dataframe for chart
    chart_df = df["Sentiment"].value_counts().reset_index()
    chart_df.columns = ["Sentiment", "Count"]

    return df, chart_df

    # sentiment distribution
    sentiment_counts = df["Prediction"].value_counts()

    chart = pd.DataFrame({
        "Sentiment": sentiment_counts.index,
        "Count": sentiment_counts.values
    })

    return df, chart


# UI Layout
with gr.Blocks(theme=gr.themes.Soft()) as app:

    gr.Markdown(
    """
    # 🛍️ Amazon Review Sentiment Analyzer
    
    This AI system analyzes customer reviews and predicts sentiment using **TF-IDF + Logistic Regression**.
    """
    )

    with gr.Tab("Single Review Analysis"):

        review_input = gr.Textbox(
            lines=4,
            placeholder="Enter a customer review..."
        )

        analyze_button = gr.Button("Analyze Sentiment")

        sentiment_output = gr.Textbox(label="Prediction")

        probability_chart = gr.BarPlot(
            x="Sentiment",
            y="Probability",
            title="Sentiment Confidence"
        )

        analyze_button.click(
            predict_sentiment,
            inputs=review_input,
            outputs=[sentiment_output, probability_chart]
        )


    with gr.Tab("Bulk Review Analysis"):

        bulk_input = gr.Textbox(
            lines=10,
            placeholder="Enter multiple reviews (one per line)"
        )

        bulk_button = gr.Button("Analyze Reviews")

        results_table = gr.Dataframe()

        sentiment_chart = gr.Plot(label="Sentiment Distribution")
    
        bulk_button.click(
    analyze_multiple_reviews,
    inputs=bulk_input,
    outputs=[results_table, sentiment_chart]
        )


app.launch()