import pandas as pd from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import CountVectorizer from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression import joblib import gradio as gr # Load the dataset data_df = pd.read_csv('homework01_text_data_group13.csv') # Separate features and labels X = data_df['reviews'] y = data_df['class'] # Split the data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) # Create bag-of-words representations vectorizer = CountVectorizer() X_train_counts = vectorizer.fit_transform(X_train) X_test_counts = vectorizer.transform(X_test) # Train the KNN model knn_model = KNeighborsClassifier(n_neighbors=2, metric='euclidean') knn_model.fit(X_train_counts, y_train) # Train the Logistic Regression model logistic_model = LogisticRegression(penalty='l2', C=1, random_state=0) logistic_model.fit(X_train_counts, y_train) # Save the trained models joblib.dump(knn_model, 'best_knn_model.pkl') joblib.dump(logistic_model, 'best_logistic_regression_model.pkl') def predict_knn(review_text, model=knn_model): X_test = vectorizer.transform([review_text]) y_pred = model.predict(X_test) y_pred_proba = model.predict_proba(X_test)[0] return {'Positive': y_pred_proba[1], 'Negative': y_pred_proba[0]} def predict_logistic(review_text, model=logistic_model): X_test = vectorizer.transform([review_text]) y_pred = model.predict(X_test) y_pred_proba = model.predict_proba(X_test)[0] return {'Positive': y_pred_proba[1], 'Negative': y_pred_proba[0]} models = ["KNN", "Logistic Regression"] def predict(review_text, model): if model == "KNN": output = predict_knn(review_text) else: output = predict_logistic(review_text) if output['Positive'] > output['Negative']: sentiment = "Positive Feedback" else: sentiment = "Negative Feedback" return sentiment, output demo = gr.Interface( fn=predict, inputs=[ gr.Textbox(lines=2, placeholder="Enter your review comment...", label="Review Comment"), gr.Dropdown(choices=models, label="Select Model") ], outputs=[ gr.Textbox(label="Predicted Sentiment Class"), gr.Label(num_top_classes=2, label="Predicted Probability") ], examples=[ ["This Food is interesting, I need a second Plate", "KNN"], ["This Food is interesting, I need a second Plate", "Logistic Regression"], ["The food was terrible, and the service was worse.", "KNN"], ["The food was terrible, and the service was worse.", "Logistic Regression"] ] ) demo.launch()