import gradio as gr import joblib import re import nltk import pandas as pd from nltk.corpus import stopwords import plotly.express as px nltk.download("stopwords") # Load model model = joblib.load("sentiment_model.pkl") vectorizer = joblib.load("vectorizer.pkl") stop_words = set(stopwords.words("english")) # Clean text def clean_text(text): text = text.lower() text = re.sub(r"[^a-z\s]", "", text) words = text.split() words = [w for w in words if w not in stop_words] return " ".join(words) # Predict function def predict_sentiment(review): review_clean = clean_text(review) vector = vectorizer.transform([review_clean]) prediction = model.predict(vector)[0] probability = model.predict_proba(vector)[0] positive_prob = round(probability[1]*100,2) negative_prob = round(probability[0]*100,2) if prediction == 1: sentiment = "🟢 Positive" else: sentiment = "🔴 Negative" data = pd.DataFrame({ "Sentiment":["Positive","Negative"], "Probability":[positive_prob,negative_prob] }) return sentiment, data # Batch review analysis def analyze_multiple_reviews(text): reviews = [r.strip() for r in text.split("\n") if r.strip()] if len(reviews) == 0: return pd.DataFrame(), None predictions = [] for r in reviews: clean = clean_text(r) vector = vectorizer.transform([clean]) pred = model.predict(vector)[0] predictions.append("Positive" if pred == 1 else "Negative") df = pd.DataFrame({ "Review": reviews, "Sentiment": predictions }) # Create proper distribution sentiment_counts = { "Positive": predictions.count("Positive"), "Negative": predictions.count("Negative") } chart_df = pd.DataFrame([ {"Sentiment": "Positive", "Count": sentiment_counts["Positive"]}, {"Sentiment": "Negative", "Count": sentiment_counts["Negative"]} ]) # Create Plotly chart fig = px.bar( chart_df, x="Sentiment", y="Count", color="Sentiment", text="Count" ) fig.update_layout(title="Sentiment Distribution") return df, fig # FIX: Proper dataframe for chart chart_df = df["Sentiment"].value_counts().reset_index() chart_df.columns = ["Sentiment", "Count"] return df, chart_df # sentiment distribution sentiment_counts = df["Prediction"].value_counts() chart = pd.DataFrame({ "Sentiment": sentiment_counts.index, "Count": sentiment_counts.values }) return df, chart # UI Layout with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown( """ # 🛍️ Amazon Review Sentiment Analyzer This AI system analyzes customer reviews and predicts sentiment using **TF-IDF + Logistic Regression**. """ ) with gr.Tab("Single Review Analysis"): review_input = gr.Textbox( lines=4, placeholder="Enter a customer review..." ) analyze_button = gr.Button("Analyze Sentiment") sentiment_output = gr.Textbox(label="Prediction") probability_chart = gr.BarPlot( x="Sentiment", y="Probability", title="Sentiment Confidence" ) analyze_button.click( predict_sentiment, inputs=review_input, outputs=[sentiment_output, probability_chart] ) with gr.Tab("Bulk Review Analysis"): bulk_input = gr.Textbox( lines=10, placeholder="Enter multiple reviews (one per line)" ) bulk_button = gr.Button("Analyze Reviews") results_table = gr.Dataframe() sentiment_chart = gr.Plot(label="Sentiment Distribution") bulk_button.click( analyze_multiple_reviews, inputs=bulk_input, outputs=[results_table, sentiment_chart] ) app.launch()