Joblib
ynuozhang
update code
baf3373
############################ Training Classifier ############################
HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='hemolysis' # nf/solubility/hemolysis
WT='smiles' # wt/smiles
DATA_FILE="hemo_${WT}_with_embeddings"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='svm_gpu' # xgb/enet_gpu/svm_gpu
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}_new"
cd $SCRIPT_LOC
python -u train_ml.py \
--dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
--out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}_new" \
--model "${MODEL_TYPE}" \
--n_trials 20 > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1
echo "Script completed at $(date)"
############################ Training Classifier (NN) ############################
HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='hemolysis' # nf/solubility/hemolysis
WT='smiles' #wt/smiles
DATA_FILE="nf_${WT}_with_embeddings_unpooled"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='cnn' #mlp/cnn/transformer
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"
# Create log directory if it doesn't exist
mkdir -p $LOG_LOC
cd $SCRIPT_LOC
python -u train_nn.py \
--dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
--out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}_20" \
--model "${MODEL_TYPE}" \
--n_trials 20 > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}_20.log" 2>&1
echo "Script completed at $(date)"
############################ Training Regressor ############################
HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='permeability_pampa' # permeability_pampa/permeability_caco2
WT='smiles' # wt/smiles
DATA_FILE="pampa_${WT}_with_embeddings"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='svr' # xgb_reg/enet_gpu/svr
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"
# Create log directory if it doesn't exist
mkdir -p $LOG_LOC
cd $SCRIPT_LOC
python -u train_ml_regression.py \
--dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
--out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}10" \
--model "${MODEL_TYPE}" \
--n_trials 10 > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}10.log" 2>&1
echo "Script completed at $(date)"
############################ Training Regressor (NN) ############################
HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='permeability_caco2' # permeability_pampa/permeability_caco2
WT='smiles' # wt/smiles
DATA_FILE="caco2_${WT}_with_embeddings_unpooled"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='mlp' #mlp/cnn/transformer
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"
# Create log directory if it doesn't exist
mkdir -p $LOG_LOC
cd $SCRIPT_LOC
python -u train_nn_regression.py \
--dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
--out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}" \
--model "${MODEL_TYPE}" \
--n_trials 200 > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1
echo "Script completed at $(date)"
############################ Training Binding Affinity Predictor ############################
HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='binding_affinity'
WT='smiles' #wt/smiles
STATUS='pooled' #pooled/unpooled
DATA_FILE="pair_wt_${WT}_${STATUS}"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
SPECIAL_PREFIX="${OBJECTIVE}-${WT}-${STATUS}"
# Create log directory if it doesn't exist
mkdir -p $LOG_LOC
cd $SCRIPT_LOC
python -u binding_training.py \
--dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
--mode "${STATUS}" \
--out_dir "${SCRIPT_LOC}/${OBJECTIVE}/wt_${WT}_${STATUS}" \
--n_trials 200 > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1
echo "Script completed at $(date)"