import pandas as pd import numpy as np import gradio as gr from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.metrics import accuracy_score # Step 1: Load and preprocess the data # Example data loading (replace this with your actual data) data = pd.read_csv("flipkart_reviews.csv") # Preprocessing function def preprocess_text(text): text = text.lower() text = ''.join([char for char in text if char.isalnum() or char.isspace()]) return text data['Review'] = data['Review'].apply(preprocess_text) # Step 2: Feature Engineering tfidf = TfidfVectorizer() X_text = tfidf.fit_transform(data['Review']) # Combine TF-IDF features with the rating X = np.hstack((X_text.toarray(), data['Rating'].values.reshape(-1, 1))) # Define the five sentiment categories def categorize_sentiment(rating): if rating == 1: return 'Worst 😡' elif rating == 2: return 'Poor 😟' elif rating == 3: return 'Good 🙂' elif rating == 4: return 'Better 😊' else: return 'Best 😍' y = data['Rating'].apply(categorize_sentiment) # Step 3: Model Training X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = LogisticRegression() model.fit(X_train, y_train) # Evaluate the model y_pred = model.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) # Step 4: Create Gradio Interface def predict_sentiment(review, rating): review = preprocess_text(review) review_tfidf = tfidf.transform([review]) features = np.hstack((review_tfidf.toarray(), [[rating]])) sentiment = model.predict(features) return sentiment[0] interface = gr.Interface( fn=predict_sentiment, inputs=[gr.Textbox(lines=2, placeholder="Enter your review here..."), gr.Slider(1, 5, step=1, label="Rating")], outputs="text", title="Flipkart Review Sentiment Analysis", description="Enter your Flipkart review and rating to predict its sentiment." ) # Launch the interface interface.launch(inline=False)