Joblib
File size: 4,140 Bytes
baf3373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
############################  Training Classifier ############################  

HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='hemolysis' # nf/solubility/hemolysis
WT='smiles' # wt/smiles
DATA_FILE="hemo_${WT}_with_embeddings"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='svm_gpu' # xgb/enet_gpu/svm_gpu
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}_new"


cd $SCRIPT_LOC

python -u train_ml.py \
  --dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
  --out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}_new" \
  --model "${MODEL_TYPE}" \
  --n_trials 20  > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1

echo "Script completed at $(date)"

############################  Training Classifier (NN) ############################  

HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='hemolysis' # nf/solubility/hemolysis
WT='smiles' #wt/smiles
DATA_FILE="nf_${WT}_with_embeddings_unpooled"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='cnn' #mlp/cnn/transformer
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"

# Create log directory if it doesn't exist
mkdir -p $LOG_LOC

cd $SCRIPT_LOC

python -u train_nn.py \
  --dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
  --out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}_20" \
  --model "${MODEL_TYPE}" \
  --n_trials 20  > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}_20.log" 2>&1

echo "Script completed at $(date)"

############################  Training Regressor ############################  

HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='permeability_pampa' # permeability_pampa/permeability_caco2
WT='smiles' # wt/smiles
DATA_FILE="pampa_${WT}_with_embeddings"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='svr' # xgb_reg/enet_gpu/svr
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"

# Create log directory if it doesn't exist
mkdir -p $LOG_LOC

cd $SCRIPT_LOC

python -u train_ml_regression.py \
  --dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
  --out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}10" \
  --model "${MODEL_TYPE}" \
  --n_trials 10  > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}10.log" 2>&1

echo "Script completed at $(date)"

############################  Training Regressor (NN) ############################  

HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='permeability_caco2' # permeability_pampa/permeability_caco2
WT='smiles' # wt/smiles
DATA_FILE="caco2_${WT}_with_embeddings_unpooled"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
MODEL_TYPE='mlp' #mlp/cnn/transformer
SPECIAL_PREFIX="${MODEL_TYPE}-${OBJECTIVE}-${WT}"

# Create log directory if it doesn't exist
mkdir -p $LOG_LOC

cd $SCRIPT_LOC

python -u train_nn_regression.py \
  --dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
  --out_dir "${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}" \
  --model "${MODEL_TYPE}" \
  --n_trials 200  > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1

echo "Script completed at $(date)"

############################  Training Binding Affinity Predictor ############################  


HOME_LOC="x"
SCRIPT_LOC=$HOME_LOC/Classifier_Weight/training_classifiers
DATA_LOC=$HOME_LOC/Classifier_Weight/training_data_cleaned
OBJECTIVE='binding_affinity'
WT='smiles' #wt/smiles
STATUS='pooled' #pooled/unpooled
DATA_FILE="pair_wt_${WT}_${STATUS}"
LOG_LOC=$SCRIPT_LOC/src/logs
DATE=$(date +%m_%d)
SPECIAL_PREFIX="${OBJECTIVE}-${WT}-${STATUS}"

# Create log directory if it doesn't exist
mkdir -p $LOG_LOC

cd $SCRIPT_LOC

python -u binding_training.py \
  --dataset_path "${DATA_LOC}/${OBJECTIVE}/${DATA_FILE}" \
  --mode "${STATUS}" \
  --out_dir "${SCRIPT_LOC}/${OBJECTIVE}/wt_${WT}_${STATUS}" \
  --n_trials 200  > "${LOG_LOC}/${DATE}_${SPECIAL_PREFIX}.log" 2>&1

echo "Script completed at $(date)"