File size: 2,191 Bytes
677d54d
 
 
 
 
 
 
 
 
 
 
93dc562
677d54d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

# Step 1: Load and preprocess the data
# Example data loading (replace this with your actual data)
data = pd.read_csv("flipkart_reviews.csv")

# Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = ''.join([char for char in text if char.isalnum() or char.isspace()])
    return text

data['Review'] = data['Review'].apply(preprocess_text)

# Step 2: Feature Engineering
tfidf = TfidfVectorizer()
X_text = tfidf.fit_transform(data['Review'])

# Combine TF-IDF features with the rating
X = np.hstack((X_text.toarray(), data['Rating'].values.reshape(-1, 1)))

# Define the five sentiment categories
def categorize_sentiment(rating):
    if rating == 1:
        return 'Worst 😑'
    elif rating == 2:
        return 'Poor 😟'
    elif rating == 3:
        return 'Good πŸ™‚'
    elif rating == 4:
        return 'Better 😊'
    else:
        return 'Best 😍'

y = data['Rating'].apply(categorize_sentiment)

# Step 3: Model Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 4: Create Gradio Interface
def predict_sentiment(review, rating):
    review = preprocess_text(review)
    review_tfidf = tfidf.transform([review])
    features = np.hstack((review_tfidf.toarray(), [[rating]]))
    sentiment = model.predict(features)
    return sentiment[0]

interface = gr.Interface(
    fn=predict_sentiment,
    inputs=[gr.Textbox(lines=2, placeholder="Enter your review here..."), gr.Slider(1, 5, step=1, label="Rating")],
    outputs="text",
    title="Flipkart Review Sentiment Analysis",
    description="Enter your Flipkart review and rating to predict its sentiment."
)

# Launch the interface
interface.launch(inline=False)