| |
|
| | import numpy as np
|
| | import pandas as pd
|
| | import matplotlib.pyplot as plt
|
| | import seaborn as sns
|
| | from utils.data_helper import *
|
| | from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score
|
| | import os
|
| | from sklearn.metrics import balanced_accuracy_score as bac
|
| |
|
| | def pred(model, x_test, y_test):
|
| | y_pred = model.predict(x_test)
|
| | y_pred = np.array([1 if y[0] >= 0.5 else 0 for y in y_pred])
|
| | print('num students:', len(y_pred))
|
| | score = None
|
| | if len(y_pred) == len(y_test):
|
| | score = bac(y_test, y_pred)
|
| | print('BAC score:', score)
|
| |
|
| | print(f'num fail: {sum(y_pred[y_test==1])}/{sum(y_test)}')
|
| | print(f'num pass: {sum(y_pred[y_test==0] == 0)}/{sum(y_test==0)}')
|
| | return score
|
| |
|
| | def pred_pass_fail(model, x_test, label=None):
|
| | y_pred = model.predict(x_test)
|
| |
|
| | y_pred = np.array([1 if y[0] >= 0.5 else 0 for y in y_pred])
|
| | print('num students:', len(y_pred))
|
| | print(f'num fail: {sum(y_pred)}/{len(y_pred)}')
|
| | print(f'num pass: {sum(y_pred == 0)}/{len(y_pred)}')
|
| | score=None
|
| | if label==0:
|
| | score = sum(y_pred == 0)/len(y_pred)
|
| | elif label==1:
|
| | score = sum(y_pred)/len(y_pred)
|
| | print(f'Percentage correct: ', score)
|
| | return {'score': score, 'num fail': sum(y_pred), 'num pass': sum(y_pred == 0)}
|
| |
|
| |
|
| | def pred_change_label(model, x_test_real, x_test_syn, y_test=None):
|
| | y_pred_real = np.array([1 if y[0] >= 0.5 else 0 for y in model.predict(x_test_real)])
|
| | y_pred_syn = np.array([1 if y[0] >= 0.5 else 0 for y in model.predict(x_test_syn)])
|
| | print(np.sum(y_pred_real==y_pred_syn))
|
| |
|
| | pass_to_fail = np.sum((y_pred_real == 0) & (y_pred_syn == 1))
|
| | fail_to_pass = np.sum((y_pred_real == 1) & (y_pred_syn == 0))
|
| |
|
| | print('>>>>> Number of students predicted to be pass change to fail after introducing new content: ',
|
| | pass_to_fail)
|
| |
|
| | print('>>>>> Number of students predicted to be fail change to pass after introducing new content: ',
|
| | fail_to_pass)
|
| | if (y_test is not None):
|
| | print(f'>>>>> Ground Truth, Num passing students: {np.sum(y_test==0)}, Num failing students: {np.sum(y_test==1)}')
|
| | return pass_to_fail, fail_to_pass
|
| |
|
| |
|
| | def pred_change_percentage(model, x_test_real, x_test_syn, y_test=None):
|
| | y_pred_real = model.predict(x_test_real)
|
| | y_pred_syn = model.predict(x_test_syn)
|
| |
|
| | y_pred_label_real = np.array([1 if y[0] >= 0.5 else 0 for y in y_pred_real])
|
| | impact_low_performing = y_pred_syn[y_pred_label_real] - y_pred_real[y_pred_label_real]
|
| | print('>>>>> New assignment impact on average students: ', np.mean(y_pred_syn - y_pred_real), np.std(y_pred_syn - y_pred_real))
|
| | print('>>>>> New assignment impact on predicted-low-performing students', np.mean(impact_low_performing), np.std(impact_low_performing))
|
| | return np.mean(y_pred_syn - y_pred_real), np.std(y_pred_syn - y_pred_real), np.mean(impact_low_performing), np.std(impact_low_performing)
|
| |
|
| | def first_trail_prediction(pass_reward, fail_reward, DATA_DIR="Y:/data/result/easy-fail/eq_week-marras_et_al-dsp_002",
|
| | size=np.array([3, 3, 4, 3, 2, 2, 3, 2]),
|
| | feature_list=["competency_strength", "competency_alignment", "competency_anticipation", "content_alignment",
|
| | "content_anticipation", "student_speed", "student_shape"],
|
| | show=False):
|
| |
|
| | features_2 = np.load(DATA_DIR + "/feature_values.npz")
|
| | data = features_2['feature_values']
|
| | label_df = pd.read_csv(DATA_DIR + "/feature_labels.csv")
|
| | y = np.array(label_df['label-pass-fail']).astype(int)
|
| |
|
| |
|
| | data[:, :, -2] = np.log(data[:, :, -2])
|
| | pass_data = data[y==0]
|
| | fail_data = data[y==1]
|
| | if show:
|
| | print('Student pass the course:', pass_data.shape)
|
| | print('Student fail the course:', fail_data.shape)
|
| |
|
| | filna_pass_data = dealing_missing_value(pass_data)
|
| | filna_fail_data = dealing_missing_value(fail_data)
|
| | filna_all_data = dealing_missing_value(data)
|
| |
|
| | discretized_all_data = discretized_feature(clusters_list=[3, 3, 4, 3, 2, 2, 3], feature=filna_all_data, feature_list=feature_list)
|
| | discretized_pass_data = discretized_feature(clusters_list=[3, 3, 4, 3, 2, 2, 3], feature=filna_pass_data, feature_list=feature_list)
|
| | discretized_fail_data = discretized_feature(clusters_list=[3, 3, 4, 3, 2, 2, 3], feature=filna_fail_data, feature_list=feature_list)
|
| |
|
| | student_world = World(size=size, samples_trajectory=discretized_pass_data)
|
| | trajectories_pass = feature_table_to_trajectories(student_world, discretized_pass_data)
|
| | trajectories_fail = feature_table_to_trajectories(student_world, discretized_fail_data)
|
| | trajectories_all = feature_table_to_trajectories(student_world, discretized_all_data)
|
| |
|
| | save_dir = f"results/{DATA_DIR.split('/')[-1]}"
|
| |
|
| |
|
| | if not os.path.exists(save_dir):
|
| | os.makedirs(save_dir)
|
| |
|
| | plot_confusion_matrix(pass_reward, fail_reward, trajectories_all, y, save_dir)
|
| |
|
| | reward_each_student(pass_reward, trajectories_pass, 'Pass Reward, Pass Students', save_dir)
|
| | reward_each_student(pass_reward, trajectories_fail, 'Pass Reward, Fail Students', save_dir)
|
| | reward_each_student(fail_reward, trajectories_pass, 'Fail Reward, Pass Students', save_dir)
|
| | reward_each_student(fail_reward, trajectories_fail, 'Fail Reward, Fail Students', save_dir)
|
| |
|
| | def reward_each_student(reward, trajectories, title=None, path=None):
|
| | ans = []
|
| | for t in trajectories:
|
| | R = 0
|
| | for state in t:
|
| | R += reward[state[0]][0]
|
| | ans.append(R)
|
| | sns.histplot(ans)
|
| | plt.title(title)
|
| | if path:
|
| | plt.savefig(path + f'/{title}.jpg')
|
| | plt.close()
|
| | return ans
|
| |
|
| | def plot_confusion_matrix(pass_reward, fail_reward, trajectories_all, y, path):
|
| | pred_label = pred_pass_fail(pass_reward, fail_reward, trajectories_all)
|
| | y_test = np.array(y)
|
| | pred_label = np.array(pred_label)
|
| | print(np.sum(pred_label==0), np.sum(pred_label==1))
|
| | tn, fp, fn, tp = confusion_matrix(y_test, pred_label, normalize='true').ravel()
|
| | print('tn, fp, fn, tp:', tn, fp, fn, tp)
|
| | disp = ConfusionMatrixDisplay.from_predictions(
|
| | y_test,
|
| | pred_label,
|
| | display_labels=['Pass', 'Fail'],
|
| | cmap=plt.cm.Blues,
|
| | normalize='true'
|
| | )
|
| | print('Balanced Accuracy Score:', balanced_accuracy_score(y_test, pred_label))
|
| | if path:
|
| | plt.savefig(path + f'/confusion_matrix.jpg')
|
| | plt.close()
|
| |
|
| | def create_attributes(DATA_DIR, pass_reward, fail_reward,
|
| | size = np.array([3, 3, 4, 3, 2, 2, 3, 2]),
|
| | feature_list=["competency_strength", "competency_alignment", "competency_anticipation", "content_alignment",
|
| | "content_anticipation", "student_speed", "student_shape"],
|
| | show=True):
|
| | """
|
| | return X = shape (#number_student, 2) with each element (pass_reward, fail_reward)
|
| | """
|
| | features_2 = np.load(DATA_DIR + "/feature_values.npz")
|
| | data = features_2['feature_values']
|
| | label_df = pd.read_csv(DATA_DIR + "/feature_labels.csv")
|
| | y = np.array(label_df['label-pass-fail']).astype(int)
|
| |
|
| |
|
| | data[:, :, -2] = np.log(data[:, :, -2])
|
| | pass_data = data[y==0]
|
| | fail_data = data[y==1]
|
| | if show:
|
| | print('Student pass the course:', pass_data.shape)
|
| | print('Student fail the course:', fail_data.shape)
|
| |
|
| |
|
| | filna_all_data = dealing_missing_value(data)
|
| | discretized_all_data = discretized_feature(clusters_list=[3, 3, 4, 3, 2, 2, 3], feature=filna_all_data, feature_list=feature_list)
|
| | student_world = World(size=size, samples_trajectory=discretized_all_data)
|
| | trajectories_all = feature_table_to_trajectories(student_world, discretized_all_data)
|
| | student_pass_score = reward_each_student(pass_reward, trajectories_all)
|
| | student_fail_score = reward_each_student(fail_reward, trajectories_all)
|
| | X = np.column_stack((student_pass_score, student_fail_score))
|
| | return X, y
|
| |
|
| |
|
| |
|
| |
|