File size: 2,734 Bytes
d48676d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import joblib
import gradio as gr

# Load the dataset
data_df = pd.read_csv('homework01_text_data_group13.csv')

# Separate features and labels
X = data_df['reviews']
y = data_df['class']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create bag-of-words representations
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

# Train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=2, metric='euclidean')
knn_model.fit(X_train_counts, y_train)

# Train the Logistic Regression model
logistic_model = LogisticRegression(penalty='l2', C=1, random_state=0)
logistic_model.fit(X_train_counts, y_train)

# Save the trained models
joblib.dump(knn_model, 'best_knn_model.pkl')
joblib.dump(logistic_model, 'best_logistic_regression_model.pkl')

def predict_knn(review_text, model=knn_model):
    X_test = vectorizer.transform([review_text])
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[0]
    return {'Positive': y_pred_proba[1], 'Negative': y_pred_proba[0]}

def predict_logistic(review_text, model=logistic_model):
    X_test = vectorizer.transform([review_text])
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[0]
    return {'Positive': y_pred_proba[1], 'Negative': y_pred_proba[0]}

models = ["KNN", "Logistic Regression"]

def predict(review_text, model):
    if model == "KNN":
        output = predict_knn(review_text)
    else:
        output = predict_logistic(review_text)
    
    if output['Positive'] > output['Negative']:
        sentiment = "Positive Feedback"
    else:
        sentiment = "Negative Feedback"
    
    return sentiment, output

demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your review comment...", label="Review Comment"),
        gr.Dropdown(choices=models, label="Select Model")
    ],
    outputs=[
        gr.Textbox(label="Predicted Sentiment Class"),
        gr.Label(num_top_classes=2, label="Predicted Probability")
    ],
    examples=[
        ["This Food is interesting, I need a second Plate", "KNN"],
        ["This Food is interesting, I need a second Plate", "Logistic Regression"],
        ["The food was terrible, and the service was worse.", "KNN"],
        ["The food was terrible, and the service was worse.", "Logistic Regression"]
    ]
)

demo.launch()