import os from boto3.session import Session import nibabel as nib from nilearn.image import resample_to_img from nilearn.datasets import fetch_abide_pcp import numpy as np import pandas as pd from scipy.signal import detrend from scipy.stats import zscore import shutil from sklearn.model_selection import StratifiedKFold, KFold from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler def get_sub_HCP_rfMRI(subject, access_key, secret_key, addr = './nii_data'): bucketName = 'hcp-openaccess' prefix = 'HCP_1200/' session = Session(aws_access_key_id = access_key, aws_secret_access_key = secret_key) bucket = session.resource('s3').Bucket(bucketName) os.makedirs(os.path.join(addr, 'HCP_rfMRI'), exist_ok = True) for run in ['REST1_LR', 'REST1_RL', 'REST2_LR', 'REST2_RL']: source_addr = '{}{}/MNINonLinear/Results/rfMRI_{}/rfMRI_{}_hp2000_clean.nii.gz'.format(prefix, subject, run, run) target_addr = os.path.join(addr, 'HCP_rfMRI/{}_rfMRI_{}_hp2000_clean.nii.gz'.format(subject, run)) if not os.path.exists(target_addr): try: bucket.download_file(source_addr, target_addr) except: continue def get_sub_HCP_tfMRI(subject, access_key, secret_key, addr = './nii_data'): bucketName = 'hcp-openaccess' prefix = 'HCP_1200/' session = Session(aws_access_key_id = access_key, aws_secret_access_key = secret_key) bucket = session.resource('s3').Bucket(bucketName) os.makedirs(os.path.join(addr, 'HCP_tfMRI'), exist_ok = True) for task in ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION']: if task == 'WM': subtask_list = ['0bk_body', '0bk_faces', '0bk_places', '0bk_tools', '2bk_body', '2bk_faces', '2bk_places', '2bk_tools'] elif task == 'GAMBLING': subtask_list = ['win', 'loss'] elif task == 'MOTOR': subtask_list = ['cue', 'lf', 'rf', 'lh', 'rh', 't'] elif task == 'LANGUAGE': subtask_list = ['story', 'math'] elif task == 'SOCIAL': subtask_list = ['mental', 'rnd'] elif task == 'RELATIONAL': subtask_list = ['relation', 'match'] else: subtask_list = ['fear', 'neut'] source_addr = '{}{}/MNINonLinear/Results/tfMRI_{}_LR/tfMRI_{}_LR.nii.gz'.format(prefix, subject, task, task) target_addr = os.path.join(addr, 'HCP_tfMRI/{}_tfMRI_{}_LR.nii.gz'.format(subject, task)) if not os.path.exists(target_addr): try: bucket.download_file(source_addr, target_addr) except: continue for subtask in subtask_list: subtask_source_addr = '{}{}/MNINonLinear/Results/tfMRI_{}_LR/EVs/{}.txt'.format(prefix, subject, task, subtask) subtask_target_addr = os.path.join(addr, 'HCP_tfMRI/{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask)) if not os.path.exists(subtask_target_addr): try: bucket.download_file(subtask_source_addr, subtask_target_addr) except: continue def get_ABIDE(addr = './nii_data'): fetch_abide_pcp(data_dir = addr, band_pass_filtering = True) os.remove(os.path.join(addr, 'README.md')) os.remove(os.path.join(addr, 'ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv')) shutil.move(os.path.join(addr, 'ABIDE_pcp/cpac/filt_noglobal'), os.path.join(addr, 'ABIDE')) shutil.rmtree(os.path.join(addr, 'ABIDE_pcp')) def get_fc_HCP_rfMRI(subject, atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'): os.makedirs(os.path.join(fc_addr, 'HCP_rfMRI', atlas_name), exist_ok = True) for run in ['REST1_LR', 'REST1_RL', 'REST2_LR', 'REST2_RL']: rfmri_img = nib.load(os.path.join(nii_addr, 'HCP_rfMRI', '{}_rfMRI_{}_hp2000_clean.nii.gz'.format(subject, run))) rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1]) atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest') labels = atlas.get_fdata().astype(int).flatten() time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1) time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1) for i in range(int(X.shape[1]/300)): fc = np.corrcoef(time_series[i*300:(i+1)*300].T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)] if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_rfMRI', '{}/{}_{}_{}.npz'.format(atlas_name, subject, run, i)), fc) def get_fc_HCP_tfMRI(subject, atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'): os.makedirs(os.path.join(fc_addr, 'HCP_tfMRI', atlas_name), exist_ok = True) for task in ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION']: if task == 'WM': subtask_list = ['0bk_body', '0bk_faces', '0bk_places', '0bk_tools', '2bk_body', '2bk_faces', '2bk_places', '2bk_tools'] elif task == 'GAMBLING': subtask_list = ['win', 'loss'] elif task == 'MOTOR': subtask_list = ['cue', 'lf', 'rf', 'lh', 'rh', 't'] elif task == 'LANGUAGE': subtask_list = ['story', 'math'] elif task == 'SOCIAL': subtask_list = ['mental', 'rnd'] elif task == 'RELATIONAL': subtask_list = ['relation', 'match'] else: subtask_list = ['fear', 'neut'] tfmri_img = nib.load(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_LR.nii.gz'.format(subject, task))) tfmri_data = tfmri_img.get_fdata(); X = tfmri_data.reshape(-1, tfmri_data.shape[-1]) atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, tfmri_img, interpolation = 'nearest') labels = atlas.get_fdata().astype(int).flatten() time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1) time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1) fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)] if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_tfMRI', '{}/{}_{}_LR.npz'.format(atlas_name, subject, task)), fc) timepoint = np.arange(X.shape[-1]) * 0.72 for subtask in subtask_list: if not os.path.exists(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask))): continue task_timepoint = np.loadtxt(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask))).reshape(-1, 3) subtask_time_series = np.vstack([time_series[(timepoint > task_timepoint[i, 0] - 1e-3) & (timepoint < task_timepoint[i, 0] + task_timepoint[i, 1] + 1e-3)] for i in range(task_timepoint.shape[0])]) fc = np.corrcoef(subtask_time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)] if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_tfMRI', '{}/{}_{}_LR_{}.npz'.format(atlas_name, subject, task, subtask)), fc) def get_fc_ABIDE(atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'): os.makedirs(os.path.join(fc_addr, 'ABIDE', atlas_name), exist_ok = True) for filename in os.listdir(os.path.join(nii_addr, 'ABIDE')): prefix = filename[:-20] rfmri_img = nib.load(os.path.join(nii_addr, 'ABIDE', filename)) rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1]) atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest') labels = atlas.get_fdata().astype(int).flatten() time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1) time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1) fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)] if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'ABIDE', '{}/{}.npz'.format(atlas_name, prefix)), fc) def get_fc_ADNI(atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'): os.makedirs(os.path.join(fc_addr, 'ADNI', atlas_name), exist_ok = True) for prefix in os.listdir(os.path.join(nii_addr, 'ADNI')): rfmri_img = nib.load(os.path.join(nii_addr, 'ADNI', prefix, 'Filtered_4DVolume.nii')) rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1]) atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest') labels = atlas.get_fdata().astype(int).flatten() time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1) time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1) fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)] if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'ADNI', '{}/{}.npz'.format(atlas_name, prefix)), fc) def fc_stability(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) res_list = [] for sub in subjlist: if len([file for file in os.listdir(addr) if sub in file]) > 1: fc_corr = np.corrcoef(np.array([np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file])) res_list.append(np.mean(fc_corr[np.triu_indices(fc_corr.shape[0], k = 1)])) return np.array(res_list) def fingerprinting(atlas_name, fc_addr = './fc_data'): addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) subjlist = [sub for sub in subjlist if [file for file in os.listdir(addr) if sub in file] != []] reference = [np.load(os.path.join(addr, [file for file in os.listdir(addr) if sub in file][0]))['arr_0'] for sub in subjlist] res_list = [] for sub in subjlist: if len([file for file in os.listdir(addr) if sub in file]) == 1: continue file_list = [file for file in os.listdir(addr) if sub in file][1:] fc_list = [np.load(os.path.join(addr, file))['arr_0'] for file in file_list] count = 0 for idx in range(len(file_list)): temp = np.array([np.corrcoef([fc_list[idx], ref])[0, 1] for ref in reference]) if subjlist[np.where(temp == np.max(temp))[0][0]] == sub: count += 1 res_list.append(count/len(fc_list)) return np.array(res_list) def age_group_classification(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: target = behavior['Age_Group'][behavior['Subject'].index(int(sub))] if np.isnan(target): continue sub_X.append(int(sub)); sub_y.append(target) file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file] for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub)) sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y): train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def gender_classification(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: target = int(behavior['Gender'][behavior['Subject'].index(int(sub))] == 'M') if np.isnan(target): continue sub_X.append(int(sub)); sub_y.append(target) file_list = [np.load(os.path.join(os.path.join(addr, file)))['arr_0'] for file in os.listdir(addr) if sub in file] for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub)) sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y): train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def fluid_intelligence(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: target = behavior['CogFluidComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))] if np.isnan(target): continue sub_X.append(int(sub)); sub_y.append(target) file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file] for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub)) sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y): train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def crystallized_intelligence(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: target = behavior['CogCrystalComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))] if np.isnan(target): continue sub_X.append(int(sub)); sub_y.append(target) file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file] for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub)) sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y): train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def general_intelligence(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name) sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: target = behavior['CogTotalComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))] if np.isnan(target): continue sub_X.append(int(sub)); sub_y.append(target) file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file] for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub)) sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y): train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def cognitive_task_7way(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) addr = os.path.join(fc_addr, 'HCP_tfMRI', atlas_name) task = ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION'] mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: for t in task: if not os.path.exists(os.path.join(addr, '{}_{}_LR.npz'.format(sub, t))): continue mat_X.append(np.load(os.path.join(addr, '{}_{}_LR.npz'.format(sub, t)))['arr_0']); mat_y.append(task.index(t)); mat_master.append(sub) mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) kf = KFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in kf.split(subjlist): train_sub, test_sub = subjlist[trainsub_idx], subjlist[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def cognitive_task_24way(atlas_name, fc_addr = './fc_data'): subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str) addr = os.path.join(fc_addr, 'HCP_tfMRI', atlas_name) task = { '0bk_body': 'WM', '0bk_faces': 'WM', '0bk_places': 'WM', '0bk_tools': 'WM', '2bk_body': 'WM', '2bk_faces': 'WM', '2bk_places': 'WM', '2bk_tools': 'WM', 'win': 'GAMBLING', 'loss': 'GAMBLING', 'cue': 'MOTOR', 'lf': 'MOTOR', 'rf': 'MOTOR', 'lh': 'MOTOR', 'rh': 'MOTOR', 't': 'MOTOR', 'story': 'LANGUAGE', 'math': 'LANGUAGE', 'mental': 'SOCIAL', 'rnd': 'SOCIAL', 'relation': 'RELATIONAL', 'match': 'RELATIONAL', 'fear': 'EMOTION', 'neut': 'EMOTION' } mat_X = []; mat_y = []; mat_master = [] for sub in subjlist: for t in list(task.keys()): if not os.path.exists(os.path.join(addr, '{}_{}_LR_{}.npz'.format(sub, task[t], t))): continue mat_X.append(np.load(os.path.join(addr, '{}_{}_LR_{}.npz'.format(sub, task[t], t)))['arr_0']); mat_y.append(list(task.keys()).index(t)); mat_master.append(sub) mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master) kf = KFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainsub_idx, testsub_idx in kf.split(subjlist): train_sub, test_sub = subjlist[trainsub_idx], subjlist[testsub_idx] trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def autism_diagnosis(atlas_name, fc_addr = './fc_data'): behavior = pd.read_csv('./docs/behavior_ABIDE.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'ABIDE', atlas_name) mat_X = []; mat_y = [] for file in os.listdir(addr): target = behavior['DX_GROUP'][behavior['FILE_ID'].index(file[:-4])] if np.isnan(target): continue mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target) mat_X = np.array(mat_X); mat_y = np.array(mat_y) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainmat_idx, testmat_idx in skf.split(mat_X, mat_y): X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def autism_cross_site(atlas_name, fc_addr = './fc_data'): behavior = pd.read_csv('./docs/behavior_ABIDE.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'ABIDE', atlas_name) mat_X = []; mat_y = []; mat_master = [] for file in os.listdir(addr): target = behavior['DX_GROUP'][behavior['FILE_ID'].index(file[:-4])] site = behavior['SITE_ID'][behavior['FILE_ID'].index(file[:-4])] if np.isnan(target): continue mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target); mat_master.append(site) mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master, dtype = str) acc_list = [] for holdout_site in np.unique(mat_master): trainmat_idx = np.array([(master != holdout_site) for master in mat_master]); testmat_idx = np.array([(master == holdout_site) for master in mat_master]) X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def AD_diagnosis(atlas_name, fc_addr = './fc_data'): behavior = pd.read_csv('./docs/behavior_ADNI.csv').to_dict(orient = 'list') addr = os.path.join(fc_addr, 'ADNI', atlas_name) mat_X = []; mat_y = [] for file in os.listdir(addr): target = behavior['Research Group'][behavior['Subject ID'].index(file[:-4])] if target == 'CN': target_idx = 0 elif target == 'MCI': target_idx = 1 else: target_idx = 2 mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target_idx) mat_X = np.array(mat_X); mat_y = np.array(mat_y) skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0) acc_list = [] for trainmat_idx, testmat_idx in skf.split(mat_X, mat_y): X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx] if X_train.shape[1] > 100: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_pca, y_train) y_pred = clf.predict(X_test_pca) acc_list.append(accuracy_score(y_test, y_pred)) else: scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test) clf = SVC(kernel = 'linear', class_weight = 'balanced') clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) acc_list.append(accuracy_score(y_test, y_pred)) return np.array(acc_list) def downstream_all(atlas_name, fc_addr = './fc_data'): print('--- {} downstream report ---'.format(atlas_name)) res = gender_classification(atlas_name, fc_addr) print('Gender classification: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = fluid_intelligence(atlas_name, fc_addr) print('Fluid intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = cognitive_task_7way(atlas_name, fc_addr) print('Cognitive task (7-way): {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = cognitive_task_24way(atlas_name, fc_addr) print('Cognitive task (24-way): {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = autism_diagnosis(atlas_name, fc_addr) print('Autism diagnosis: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = AD_diagnosis(atlas_name, fc_addr) print('AD diagnosis: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = fc_stability(atlas_name, fc_addr) print('FC stability: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = fingerprinting(atlas_name, fc_addr) print('Fingerprinting: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = age_group_classification(atlas_name, fc_addr) print('Age group classification: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = crystallized_intelligence(atlas_name, fc_addr) print('Crystallized intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = general_intelligence(atlas_name, fc_addr) print('General intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res))) res = autism_cross_site(atlas_name, fc_addr) print('Autism cross-site: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))