DCA / AtlaScore /downstream /downstream.py
testtest123's picture
Sync from GitHub ncclab-sustech/DCA main
35a599b
Raw
History Blame Contribute Delete
34.5 kB
import os
from boto3.session import Session
import nibabel as nib
from nilearn.image import resample_to_img
from nilearn.datasets import fetch_abide_pcp
import numpy as np
import pandas as pd
from scipy.signal import detrend
from scipy.stats import zscore
import shutil
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
def get_sub_HCP_rfMRI(subject, access_key, secret_key, addr = './nii_data'):
bucketName = 'hcp-openaccess'
prefix = 'HCP_1200/'
session = Session(aws_access_key_id = access_key, aws_secret_access_key = secret_key)
bucket = session.resource('s3').Bucket(bucketName)
os.makedirs(os.path.join(addr, 'HCP_rfMRI'), exist_ok = True)
for run in ['REST1_LR', 'REST1_RL', 'REST2_LR', 'REST2_RL']:
source_addr = '{}{}/MNINonLinear/Results/rfMRI_{}/rfMRI_{}_hp2000_clean.nii.gz'.format(prefix, subject, run, run)
target_addr = os.path.join(addr, 'HCP_rfMRI/{}_rfMRI_{}_hp2000_clean.nii.gz'.format(subject, run))
if not os.path.exists(target_addr):
try: bucket.download_file(source_addr, target_addr)
except: continue
def get_sub_HCP_tfMRI(subject, access_key, secret_key, addr = './nii_data'):
bucketName = 'hcp-openaccess'
prefix = 'HCP_1200/'
session = Session(aws_access_key_id = access_key, aws_secret_access_key = secret_key)
bucket = session.resource('s3').Bucket(bucketName)
os.makedirs(os.path.join(addr, 'HCP_tfMRI'), exist_ok = True)
for task in ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION']:
if task == 'WM': subtask_list = ['0bk_body', '0bk_faces', '0bk_places', '0bk_tools', '2bk_body', '2bk_faces', '2bk_places', '2bk_tools']
elif task == 'GAMBLING': subtask_list = ['win', 'loss']
elif task == 'MOTOR': subtask_list = ['cue', 'lf', 'rf', 'lh', 'rh', 't']
elif task == 'LANGUAGE': subtask_list = ['story', 'math']
elif task == 'SOCIAL': subtask_list = ['mental', 'rnd']
elif task == 'RELATIONAL': subtask_list = ['relation', 'match']
else: subtask_list = ['fear', 'neut']
source_addr = '{}{}/MNINonLinear/Results/tfMRI_{}_LR/tfMRI_{}_LR.nii.gz'.format(prefix, subject, task, task)
target_addr = os.path.join(addr, 'HCP_tfMRI/{}_tfMRI_{}_LR.nii.gz'.format(subject, task))
if not os.path.exists(target_addr):
try: bucket.download_file(source_addr, target_addr)
except: continue
for subtask in subtask_list:
subtask_source_addr = '{}{}/MNINonLinear/Results/tfMRI_{}_LR/EVs/{}.txt'.format(prefix, subject, task, subtask)
subtask_target_addr = os.path.join(addr, 'HCP_tfMRI/{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask))
if not os.path.exists(subtask_target_addr):
try: bucket.download_file(subtask_source_addr, subtask_target_addr)
except: continue
def get_ABIDE(addr = './nii_data'):
fetch_abide_pcp(data_dir = addr, band_pass_filtering = True)
os.remove(os.path.join(addr, 'README.md'))
os.remove(os.path.join(addr, 'ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv'))
shutil.move(os.path.join(addr, 'ABIDE_pcp/cpac/filt_noglobal'), os.path.join(addr, 'ABIDE'))
shutil.rmtree(os.path.join(addr, 'ABIDE_pcp'))
def get_fc_HCP_rfMRI(subject, atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'):
os.makedirs(os.path.join(fc_addr, 'HCP_rfMRI', atlas_name), exist_ok = True)
for run in ['REST1_LR', 'REST1_RL', 'REST2_LR', 'REST2_RL']:
rfmri_img = nib.load(os.path.join(nii_addr, 'HCP_rfMRI', '{}_rfMRI_{}_hp2000_clean.nii.gz'.format(subject, run)))
rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1])
atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest')
labels = atlas.get_fdata().astype(int).flatten()
time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1)
time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1)
for i in range(int(X.shape[1]/300)):
fc = np.corrcoef(time_series[i*300:(i+1)*300].T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)]
if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_rfMRI', '{}/{}_{}_{}.npz'.format(atlas_name, subject, run, i)), fc)
def get_fc_HCP_tfMRI(subject, atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'):
os.makedirs(os.path.join(fc_addr, 'HCP_tfMRI', atlas_name), exist_ok = True)
for task in ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION']:
if task == 'WM': subtask_list = ['0bk_body', '0bk_faces', '0bk_places', '0bk_tools', '2bk_body', '2bk_faces', '2bk_places', '2bk_tools']
elif task == 'GAMBLING': subtask_list = ['win', 'loss']
elif task == 'MOTOR': subtask_list = ['cue', 'lf', 'rf', 'lh', 'rh', 't']
elif task == 'LANGUAGE': subtask_list = ['story', 'math']
elif task == 'SOCIAL': subtask_list = ['mental', 'rnd']
elif task == 'RELATIONAL': subtask_list = ['relation', 'match']
else: subtask_list = ['fear', 'neut']
tfmri_img = nib.load(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_LR.nii.gz'.format(subject, task)))
tfmri_data = tfmri_img.get_fdata(); X = tfmri_data.reshape(-1, tfmri_data.shape[-1])
atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, tfmri_img, interpolation = 'nearest')
labels = atlas.get_fdata().astype(int).flatten()
time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1)
time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1)
fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)]
if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_tfMRI', '{}/{}_{}_LR.npz'.format(atlas_name, subject, task)), fc)
timepoint = np.arange(X.shape[-1]) * 0.72
for subtask in subtask_list:
if not os.path.exists(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask))): continue
task_timepoint = np.loadtxt(os.path.join(nii_addr, 'HCP_tfMRI', '{}_tfMRI_{}_{}_ev.txt'.format(subject, task, subtask))).reshape(-1, 3)
subtask_time_series = np.vstack([time_series[(timepoint > task_timepoint[i, 0] - 1e-3) & (timepoint < task_timepoint[i, 0] + task_timepoint[i, 1] + 1e-3)] for i in range(task_timepoint.shape[0])])
fc = np.corrcoef(subtask_time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)]
if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'HCP_tfMRI', '{}/{}_{}_LR_{}.npz'.format(atlas_name, subject, task, subtask)), fc)
def get_fc_ABIDE(atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'):
os.makedirs(os.path.join(fc_addr, 'ABIDE', atlas_name), exist_ok = True)
for filename in os.listdir(os.path.join(nii_addr, 'ABIDE')):
prefix = filename[:-20]
rfmri_img = nib.load(os.path.join(nii_addr, 'ABIDE', filename))
rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1])
atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest')
labels = atlas.get_fdata().astype(int).flatten()
time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1)
time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1)
fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)]
if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'ABIDE', '{}/{}.npz'.format(atlas_name, prefix)), fc)
def get_fc_ADNI(atlas_name, atlas_loc, nii_addr = './nii_data', fc_addr = './fc_data'):
os.makedirs(os.path.join(fc_addr, 'ADNI', atlas_name), exist_ok = True)
for prefix in os.listdir(os.path.join(nii_addr, 'ADNI')):
rfmri_img = nib.load(os.path.join(nii_addr, 'ADNI', prefix, 'Filtered_4DVolume.nii'))
rfmri_data = rfmri_img.get_fdata(); X = rfmri_data.reshape(-1, rfmri_data.shape[-1])
atlas = nib.load(atlas_loc); atlas = resample_to_img(atlas, rfmri_img, interpolation = 'nearest')
labels = atlas.get_fdata().astype(int).flatten()
time_series = np.stack([X[labels == label, :].mean(axis = 0) for label in np.sort(np.unique(labels[labels > 0]))], axis = 1)
time_series = zscore(detrend(time_series, axis = 0, type = 'linear'), axis = 0, ddof = 1)
fc = np.corrcoef(time_series.T); n = fc.shape[0]; fc = fc[np.triu_indices(n, k = 1)]
if not np.isnan(fc).any(): np.savez_compressed(os.path.join(fc_addr, 'ADNI', '{}/{}.npz'.format(atlas_name, prefix)), fc)
def fc_stability(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
res_list = []
for sub in subjlist:
if len([file for file in os.listdir(addr) if sub in file]) > 1:
fc_corr = np.corrcoef(np.array([np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file]))
res_list.append(np.mean(fc_corr[np.triu_indices(fc_corr.shape[0], k = 1)]))
return np.array(res_list)
def fingerprinting(atlas_name, fc_addr = './fc_data'):
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
subjlist = [sub for sub in subjlist if [file for file in os.listdir(addr) if sub in file] != []]
reference = [np.load(os.path.join(addr, [file for file in os.listdir(addr) if sub in file][0]))['arr_0'] for sub in subjlist]
res_list = []
for sub in subjlist:
if len([file for file in os.listdir(addr) if sub in file]) == 1: continue
file_list = [file for file in os.listdir(addr) if sub in file][1:]
fc_list = [np.load(os.path.join(addr, file))['arr_0'] for file in file_list]
count = 0
for idx in range(len(file_list)):
temp = np.array([np.corrcoef([fc_list[idx], ref])[0, 1] for ref in reference])
if subjlist[np.where(temp == np.max(temp))[0][0]] == sub: count += 1
res_list.append(count/len(fc_list))
return np.array(res_list)
def age_group_classification(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
target = behavior['Age_Group'][behavior['Subject'].index(int(sub))]
if np.isnan(target): continue
sub_X.append(int(sub)); sub_y.append(target)
file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file]
for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub))
sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y):
train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def gender_classification(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
target = int(behavior['Gender'][behavior['Subject'].index(int(sub))] == 'M')
if np.isnan(target): continue
sub_X.append(int(sub)); sub_y.append(target)
file_list = [np.load(os.path.join(os.path.join(addr, file)))['arr_0'] for file in os.listdir(addr) if sub in file]
for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub))
sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y):
train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def fluid_intelligence(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
target = behavior['CogFluidComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))]
if np.isnan(target): continue
sub_X.append(int(sub)); sub_y.append(target)
file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file]
for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub))
sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y):
train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def crystallized_intelligence(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
target = behavior['CogCrystalComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))]
if np.isnan(target): continue
sub_X.append(int(sub)); sub_y.append(target)
file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file]
for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub))
sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y):
train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def general_intelligence(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
behavior = pd.read_csv('./docs/behavior_HCP.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'HCP_rfMRI', atlas_name)
sub_X = []; sub_y = []; mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
target = behavior['CogTotalComp_AgeAdj_Group'][behavior['Subject'].index(int(sub))]
if np.isnan(target): continue
sub_X.append(int(sub)); sub_y.append(target)
file_list = [np.load(os.path.join(addr, file))['arr_0'] for file in os.listdir(addr) if sub in file]
for f in file_list: mat_X.append(f); mat_y.append(target); mat_master.append(int(sub))
sub_X = np.array(sub_X); sub_y = np.array(sub_y); mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in skf.split(sub_X, sub_y):
train_sub, test_sub = sub_X[trainsub_idx], sub_X[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def cognitive_task_7way(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
addr = os.path.join(fc_addr, 'HCP_tfMRI', atlas_name)
task = ['WM', 'GAMBLING', 'MOTOR', 'LANGUAGE', 'SOCIAL', 'RELATIONAL', 'EMOTION']
mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
for t in task:
if not os.path.exists(os.path.join(addr, '{}_{}_LR.npz'.format(sub, t))): continue
mat_X.append(np.load(os.path.join(addr, '{}_{}_LR.npz'.format(sub, t)))['arr_0']); mat_y.append(task.index(t)); mat_master.append(sub)
mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
kf = KFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in kf.split(subjlist):
train_sub, test_sub = subjlist[trainsub_idx], subjlist[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def cognitive_task_24way(atlas_name, fc_addr = './fc_data'):
subjlist = np.loadtxt('./docs/HCP_subjlist.txt', dtype = str)
addr = os.path.join(fc_addr, 'HCP_tfMRI', atlas_name)
task = {
'0bk_body': 'WM', '0bk_faces': 'WM', '0bk_places': 'WM', '0bk_tools': 'WM', '2bk_body': 'WM', '2bk_faces': 'WM', '2bk_places': 'WM', '2bk_tools': 'WM',
'win': 'GAMBLING', 'loss': 'GAMBLING',
'cue': 'MOTOR', 'lf': 'MOTOR', 'rf': 'MOTOR', 'lh': 'MOTOR', 'rh': 'MOTOR', 't': 'MOTOR',
'story': 'LANGUAGE', 'math': 'LANGUAGE',
'mental': 'SOCIAL', 'rnd': 'SOCIAL',
'relation': 'RELATIONAL', 'match': 'RELATIONAL',
'fear': 'EMOTION', 'neut': 'EMOTION'
}
mat_X = []; mat_y = []; mat_master = []
for sub in subjlist:
for t in list(task.keys()):
if not os.path.exists(os.path.join(addr, '{}_{}_LR_{}.npz'.format(sub, task[t], t))): continue
mat_X.append(np.load(os.path.join(addr, '{}_{}_LR_{}.npz'.format(sub, task[t], t)))['arr_0']); mat_y.append(list(task.keys()).index(t)); mat_master.append(sub)
mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master)
kf = KFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainsub_idx, testsub_idx in kf.split(subjlist):
train_sub, test_sub = subjlist[trainsub_idx], subjlist[testsub_idx]
trainmat_idx = np.array([(train_sub == master).any() for master in mat_master]); testmat_idx = np.array([(test_sub == master).any() for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def autism_diagnosis(atlas_name, fc_addr = './fc_data'):
behavior = pd.read_csv('./docs/behavior_ABIDE.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'ABIDE', atlas_name)
mat_X = []; mat_y = []
for file in os.listdir(addr):
target = behavior['DX_GROUP'][behavior['FILE_ID'].index(file[:-4])]
if np.isnan(target): continue
mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target)
mat_X = np.array(mat_X); mat_y = np.array(mat_y)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainmat_idx, testmat_idx in skf.split(mat_X, mat_y):
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def autism_cross_site(atlas_name, fc_addr = './fc_data'):
behavior = pd.read_csv('./docs/behavior_ABIDE.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'ABIDE', atlas_name)
mat_X = []; mat_y = []; mat_master = []
for file in os.listdir(addr):
target = behavior['DX_GROUP'][behavior['FILE_ID'].index(file[:-4])]
site = behavior['SITE_ID'][behavior['FILE_ID'].index(file[:-4])]
if np.isnan(target): continue
mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target); mat_master.append(site)
mat_X = np.array(mat_X); mat_y = np.array(mat_y); mat_master = np.array(mat_master, dtype = str)
acc_list = []
for holdout_site in np.unique(mat_master):
trainmat_idx = np.array([(master != holdout_site) for master in mat_master]); testmat_idx = np.array([(master == holdout_site) for master in mat_master])
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def AD_diagnosis(atlas_name, fc_addr = './fc_data'):
behavior = pd.read_csv('./docs/behavior_ADNI.csv').to_dict(orient = 'list')
addr = os.path.join(fc_addr, 'ADNI', atlas_name)
mat_X = []; mat_y = []
for file in os.listdir(addr):
target = behavior['Research Group'][behavior['Subject ID'].index(file[:-4])]
if target == 'CN': target_idx = 0
elif target == 'MCI': target_idx = 1
else: target_idx = 2
mat_X.append(np.load(os.path.join(addr, file))['arr_0']); mat_y.append(target_idx)
mat_X = np.array(mat_X); mat_y = np.array(mat_y)
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 0)
acc_list = []
for trainmat_idx, testmat_idx in skf.split(mat_X, mat_y):
X_train, X_test = mat_X[trainmat_idx], mat_X[testmat_idx]; y_train, y_test = mat_y[trainmat_idx], mat_y[testmat_idx]
if X_train.shape[1] > 100:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components = 100, random_state = 0); X_train_pca = pca.fit_transform(X_train_scaled); X_test_pca = pca.transform(X_test_scaled)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
acc_list.append(accuracy_score(y_test, y_pred))
else:
scaler = StandardScaler(); X_train_scaled = scaler.fit_transform(X_train); X_test_scaled = scaler.transform(X_test)
clf = SVC(kernel = 'linear', class_weight = 'balanced')
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
acc_list.append(accuracy_score(y_test, y_pred))
return np.array(acc_list)
def downstream_all(atlas_name, fc_addr = './fc_data'):
print('--- {} downstream report ---'.format(atlas_name))
res = gender_classification(atlas_name, fc_addr)
print('Gender classification: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = fluid_intelligence(atlas_name, fc_addr)
print('Fluid intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = cognitive_task_7way(atlas_name, fc_addr)
print('Cognitive task (7-way): {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = cognitive_task_24way(atlas_name, fc_addr)
print('Cognitive task (24-way): {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = autism_diagnosis(atlas_name, fc_addr)
print('Autism diagnosis: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = AD_diagnosis(atlas_name, fc_addr)
print('AD diagnosis: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = fc_stability(atlas_name, fc_addr)
print('FC stability: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = fingerprinting(atlas_name, fc_addr)
print('Fingerprinting: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = age_group_classification(atlas_name, fc_addr)
print('Age group classification: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = crystallized_intelligence(atlas_name, fc_addr)
print('Crystallized intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = general_intelligence(atlas_name, fc_addr)
print('General intelligence: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))
res = autism_cross_site(atlas_name, fc_addr)
print('Autism cross-site: {:.3f}±{:.3f}'.format(np.mean(res), np.std(res)))