import os import pickle import warnings import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from pathlib import Path from scipy import stats from sklearn.model_selection import ( train_test_split, StratifiedKFold, cross_validate ) from sklearn.preprocessing import StandardScaler from sklearn.metrics import ( accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix ) from sklearn.linear_model import LogisticRegression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import ( RandomForestClassifier, VotingClassifier, BaggingClassifier, StackingClassifier, ) import xgboost as xgb import lightgbm as lgb warnings.filterwarnings('ignore') np.random.seed(42) REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..')) DATA_DIR = os.path.join(REPO_ROOT, 'Datasets_all') OUT_DIR = Path('models') OUT_DIR.mkdir(exist_ok=True) RANDOM_STATE = 42 N_SPLITS = 5 CHAMPION_F1 = 0.6110 # Score from A4 movement_features_df = pd.read_csv(os.path.join(DATA_DIR, 'aimoscores.csv')) weaklink_scores_df = pd.read_csv(os.path.join(DATA_DIR, 'scores_and_weaklink.csv')) print('Movement features shape:', movement_features_df.shape) print('Weak link scores shape:', weaklink_scores_df.shape) DUPLICATE_NASM_COLS = [ 'No_1_NASM_Deviation', 'No_2_NASM_Deviation', 'No_3_NASM_Deviation', 'No_4_NASM_Deviation', 'No_5_NASM_Deviation', ] movement_features_df = movement_features_df.drop(columns=DUPLICATE_NASM_COLS) print('Shape after duplicate removal:', movement_features_df.shape) weaklink_categories = [ 'ExcessiveForwardLean', 'ForwardHead', 'LeftArmFallForward', 'LeftAsymmetricalWeightShift', 'LeftHeelRises', 'LeftKneeMovesInward', 'LeftKneeMovesOutward', 'LeftShoulderElevation', 'RightArmFallForward', 'RightAsymmetricalWeightShift', 'RightHeelRises', 'RightKneeMovesInward', 'RightKneeMovesOutward', 'RightShoulderElevation', ] weaklink_scores_df['WeakestLink'] = ( weaklink_scores_df[weaklink_categories].idxmax(axis=1) ) print('Weakest Link class distribution:') print(weaklink_scores_df['WeakestLink'].value_counts()) # Merge Datasets target_df = weaklink_scores_df[['ID', 'WeakestLink']].copy() merged_df = movement_features_df.merge(target_df, on='ID', how='inner') print('Merged dataset shape:', merged_df.shape) EXCLUDE_COLS = ['ID', 'WeakestLink', 'EstimatedScore'] feature_columns = [c for c in merged_df.columns if c not in EXCLUDE_COLS] X = merged_df[feature_columns].values y = merged_df['WeakestLink'].values print(f'Feature matrix shape : {X.shape}') print(f'Number of features : {len(feature_columns)}') print(f'Number of classes : {len(np.unique(y))}') # is the training split needed for cross validation? X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y ) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) print(f'Training samples : {X_train.shape[0]}') print(f'Test samples : {X_test.shape[0]}') cv_strategy = StratifiedKFold( n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE ) def evaluate_cv(model, X, y, cv, name='Model'): scoring = { 'accuracy' : 'accuracy', 'f1' : 'f1_weighted', 'precision': 'precision_weighted', 'recall' : 'recall_weighted', } cv_res = cross_validate(model, X, y, cv=cv, scoring=scoring) return { 'Model' : name, 'Accuracy_mean' : cv_res['test_accuracy'].mean(), 'Accuracy_std' : cv_res['test_accuracy'].std(), 'F1_mean' : cv_res['test_f1'].mean(), 'F1_std' : cv_res['test_f1'].std(), 'Precision_mean': cv_res['test_precision'].mean(), 'Recall_mean' : cv_res['test_recall'].mean(), '_f1_scores' : cv_res['test_f1'], } rf_champion = RandomForestClassifier( n_estimators=200, max_depth=15, min_samples_split=5, min_samples_leaf=2, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1 ) champ_cv = evaluate_cv( rf_champion, X_train_scaled, y_train, cv_strategy, name='A4 Champion – Random Forest' ) rf_champion.fit(X_train_scaled, y_train) champ_test_f1 = f1_score(y_test, rf_champion.predict(X_test_scaled), average='weighted') print('A4 CHAMPION (Random Forest)') print(f'CV F1: {champ_cv["F1_mean"]:.4f} +/- {champ_cv["F1_std"]:.4f}') print(f'Test F1: {champ_test_f1:.4f}') soft_voting = VotingClassifier( estimators=[ ('rf', RandomForestClassifier(n_estimators=200, max_depth=15, min_samples_split=5, min_samples_leaf=2, class_weight='balanced_subsample', random_state=RANDOM_STATE, n_jobs=-1)), ('lr', LogisticRegression( max_iter=1000, class_weight='balanced',random_state=RANDOM_STATE)), ('xgb', xgb.XGBClassifier( n_estimators=200, max_depth=6, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, random_state=RANDOM_STATE,class_weight='balanced', n_jobs=-1 )), ('lgb', lgb.LGBMClassifier( n_estimators=200, learning_rate=0.1, class_weight='balanced',subsample=0.8, colsample_bytree=0.8, random_state=RANDOM_STATE, n_jobs=-1, verbosity=-1 )), ('knn', KNeighborsClassifier(n_neighbors=7)), ('lda', LinearDiscriminantAnalysis()), ], voting='soft', n_jobs=-1, ) sv_cv = evaluate_cv(soft_voting, X_train_scaled, y_train, cv_strategy, name='Soft Voting') print(f'Soft Voting CV F1: {sv_cv["F1_mean"]:.4f} +/- {sv_cv["F1_std"]:.4f}') all_results = [champ_cv, sv_cv] results_df = ( pd.DataFrame([{k: v for k, v in r.items() if k != '_f1_scores'} for r in all_results]) .sort_values('F1_mean', ascending=False) .reset_index(drop=True) ) print('5-FOLD CROSS-VALIDATION SUMMARY') print(results_df[['Model','F1_mean','F1_std','Accuracy_mean', 'Precision_mean','Recall_mean']].to_string(index=False)) # Statistical Significance Test (t-test) def corrected_resampled_ttest(scores_a, scores_b, n_train, n_test): k = len(scores_a) diff = scores_a - scores_b d_bar = diff.mean() s_sq = diff.var(ddof=1) var_corr = (1/k + n_test/n_train) * s_sq t_stat = d_bar / np.sqrt(var_corr) p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df=k-1)) return float(t_stat), float(p_value) n_total = len(X_train_scaled) n_test_fold = n_total // N_SPLITS n_train_fold = n_total - n_test_fold result_map = {r['Model']: r['_f1_scores'] for r in all_results} champ_scores = result_map['A4 Champion – Random Forest'] print('STATISTICAL SIGNIFICANCE TESTS vs A4 Champion') for r in all_results: if 'Champion' in r['Model']: continue t, p = corrected_resampled_ttest( r['_f1_scores'], champ_scores, n_train_fold, n_test_fold ) print(f' {r["Model"]:<35} t={t:+.3f} p={p:.4f}') # unecessary eval on the test set? model_objects = { 'Soft Voting' : soft_voting, 'A4 Champion – Random Forest': rf_champion, } best_name = results_df.iloc[0]['Model'] best_model = model_objects[best_name] print(f'CHAMPION ENSEMBLE: {best_name}') print(f'CV F1 : {results_df.iloc[0]["F1_mean"]:.4f} +/- {results_df.iloc[0]["F1_std"]:.4f}') best_model.fit(X_train_scaled, y_train) y_pred_best = best_model.predict(X_test_scaled) test_f1 = f1_score(y_test, y_pred_best, average='weighted') test_acc = accuracy_score(y_test, y_pred_best) test_prec = precision_score(y_test, y_pred_best, average='weighted', zero_division=0) test_rec = recall_score(y_test, y_pred_best, average='weighted', zero_division=0) improvement = (test_f1 - CHAMPION_F1) / CHAMPION_F1 * 100 print('\n TEST SET RESULTS') print(f'F1-Score (weighted) : {test_f1:.4f}') print(f'Accuracy : {test_acc:.4f}') print(f'Precision : {test_prec:.4f}') print(f'Recall : {test_rec:.4f}') print(f'\n A4 original champion F1 : {CHAMPION_F1:.4f}') test_rows = [] for name, model in model_objects.items(): model.fit(X_train_scaled, y_train) preds = model.predict(X_test_scaled) test_rows.append({ 'Model' : name, 'Test_F1' : f1_score(y_test, preds, average='weighted'), 'Test_Acc' : accuracy_score(y_test, preds), 'Test_Prec' : precision_score(y_test, preds, average='weighted', zero_division=0), 'Test_Recall': recall_score(y_test, preds, average='weighted', zero_division=0), }) test_results_df = pd.DataFrame(test_rows).sort_values('Test_F1', ascending=False) print('TEST SET COMPARISON – ALL MODELS') print(test_results_df.to_string(index=False)) print(f'CLASSIFICATION REPORT: {best_name}') print(classification_report(y_test, y_pred_best, zero_division=0)) # save model artifact = { 'model' : best_model, 'model_name' : best_name, 'scaler' : scaler, 'feature_columns' : feature_columns, 'cv_metrics': { 'f1_mean' : float(results_df.iloc[0]['F1_mean']), 'f1_std' : float(results_df.iloc[0]['F1_std']), 'accuracy_mean': float(results_df.iloc[0]['Accuracy_mean']), }, 'test_metrics': { 'f1' : float(test_f1), 'accuracy' : float(test_acc), 'precision': float(test_prec), 'recall' : float(test_rec), }, 'a4_champion_f1' : CHAMPION_F1, 'improvement_pct': float(improvement), } out_path = OUT_DIR / 'ensemble_classification_champion.pkl' with open(out_path, 'wb') as f: pickle.dump(artifact, f) print(f'Saved: {out_path}')