Upload weights and training details
Browse files- .gitattributes +1 -0
- complete_no_valid_oversample/EpiLaP/0f8e5eb996114868a17057bebe64f87c/checkpoints/epoch=299-step=300.ckpt +3 -0
- complete_no_valid_oversample/best_checkpoint.list +1 -0
- complete_no_valid_oversample/launch_script_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none-job21513819.sh +202 -0
- complete_no_valid_oversample/output_job21513819_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none_1695753894.e +101 -0
- complete_no_valid_oversample/output_job21513819_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none_1695753894.o +118 -0
- complete_no_valid_oversample/split0_training_2023-09-26_18-46-35.md5 +0 -0
- complete_no_valid_oversample/train_confusion_matrix.csv +12 -0
- complete_no_valid_oversample/train_confusion_matrix.png +3 -0
- complete_no_valid_oversample/train_confusion_matrix_relative.csv +12 -0
- complete_no_valid_oversample/training_mapping.tsv +11 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
complete_no_valid_oversample/train_confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
|
complete_no_valid_oversample/EpiLaP/0f8e5eb996114868a17057bebe64f87c/checkpoints/epoch=299-step=300.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a59cb704552f1fe009c05780878c487e125d021d250b886afcd38deaad2a44ce
|
| 3 |
+
size 1091995966
|
complete_no_valid_oversample/best_checkpoint.list
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/lustre07/scratch/rabyj/recount3/epiclass_models/hg38_100kb_all_none/assay_epiclass_1l_3000n/11c/complete_no_valid_oversample/EpiLaP/0f8e5eb996114868a17057bebe64f87c/checkpoints/epoch=299-step=300.ckpt
|
complete_no_valid_oversample/launch_script_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none-job21513819.sh
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
#SBATCH --time=6:00:00
|
| 3 |
+
#SBATCH --account=def-jacquesp
|
| 4 |
+
#SBATCH --job-name=NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none
|
| 5 |
+
#SBATCH --output=/lustre06/project/6007017/rabyj/epilap/output/sub/slurm_files/%x-job%j.out
|
| 6 |
+
#SBATCH --nodes=1
|
| 7 |
+
#SBATCH --gres=gpu:1
|
| 8 |
+
#SBATCH --mem=64G
|
| 9 |
+
#SBATCH --mail-user=joanny.raby@usherbrooke.ca
|
| 10 |
+
#SBATCH --mail-type=END,FAIL
|
| 11 |
+
|
| 12 |
+
# shellcheck disable=SC1091 # Don't warn about sourcing unreachable files
|
| 13 |
+
|
| 14 |
+
export PYTHONUNBUFFERED=TRUE
|
| 15 |
+
|
| 16 |
+
if [[ -n "$SLURM_JOB_ID" ]];
|
| 17 |
+
then
|
| 18 |
+
echo "print =========================================="
|
| 19 |
+
echo "print SLURM_JOB_ID = $SLURM_JOB_ID"
|
| 20 |
+
echo "print SLURM_JOB_NODELIST = $SLURM_JOB_NODELIST"
|
| 21 |
+
echo "print =========================================="
|
| 22 |
+
fi
|
| 23 |
+
|
| 24 |
+
gen_path="/lustre06/project/6007017/rabyj"
|
| 25 |
+
input_path="${gen_path}/epilap/input"
|
| 26 |
+
output_path="${gen_path}/epilap/output/logs"
|
| 27 |
+
gen_program_path="${gen_path}/sources/epi_ml"
|
| 28 |
+
program_path="${gen_program_path}/src/python/epi_ml"
|
| 29 |
+
|
| 30 |
+
slurm_out_folder="${gen_path}/epilap/output/sub/slurm_files"
|
| 31 |
+
|
| 32 |
+
# --- use correct environment ---
|
| 33 |
+
|
| 34 |
+
set -e
|
| 35 |
+
if [[ -n "$SLURM_JOB_ID" ]];
|
| 36 |
+
then
|
| 37 |
+
cd $SLURM_TMPDIR
|
| 38 |
+
bash ${gen_program_path}/src/bash_utils/setup_venv.sh -r ${gen_program_path}/requirements/minimal_requirements.txt -s ${gen_program_path}/src/python &> ${slurm_out_folder}/${SLURM_JOB_ID}_setup.log
|
| 39 |
+
source epiclass_env/bin/activate
|
| 40 |
+
else
|
| 41 |
+
source /lustre07/scratch/rabyj/envs/epiclass/bin/activate
|
| 42 |
+
fi
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# --- choose category + hparams + source files ---
|
| 46 |
+
|
| 47 |
+
# MODIFY THINGS HERE
|
| 48 |
+
|
| 49 |
+
# RESTORE="--restore" # COMMENT IF TRAINING # IMPORTANT
|
| 50 |
+
NO_VALID="hell yeah" # COMMENT IF 10fold TRAINING # IMPORTANT
|
| 51 |
+
|
| 52 |
+
export MAX_SPLIT="69" # IMPORTANT
|
| 53 |
+
category="assay_epiclass" # IMPORTANT
|
| 54 |
+
|
| 55 |
+
if [[ "$category" == "assay_epiclass" ]]; then
|
| 56 |
+
export ASSAY_LIST='["h3k27ac", "h3k27me3", "h3k36me3", "h3k4me1", "h3k4me3", "h3k9me3", "input", "rna_seq", "mrna_seq", "wgbs-standard", "wgbs-pbat"]' # as json
|
| 57 |
+
elif [[ "$category" == "harmonized_donor_sex" ]]; then
|
| 58 |
+
export LABEL_LIST='["female", "male"]'
|
| 59 |
+
fi
|
| 60 |
+
|
| 61 |
+
export EXCLUDE_LIST='["other", "--", "NA", ""]'
|
| 62 |
+
export MIN_CLASS_SIZE="10" # IMPORTANT
|
| 63 |
+
|
| 64 |
+
hparams="human_no_valid_oversample" # IMPORTANT
|
| 65 |
+
|
| 66 |
+
release="epiatlas-dfreeze-v2.1"
|
| 67 |
+
assembly="hg38"
|
| 68 |
+
resolution="100kb" # IMPORTANT
|
| 69 |
+
|
| 70 |
+
basename="${resolution}_all_none" # IMPORTANT
|
| 71 |
+
list_name="${basename}_dfreeze_filterCtl_plus_4ctl" # IMPORTANT
|
| 72 |
+
|
| 73 |
+
dataset=${assembly}"_"${release} # ex: hg38_2018-10
|
| 74 |
+
|
| 75 |
+
echo $dataset
|
| 76 |
+
|
| 77 |
+
export LAYER_SIZE="3000" # IMPORTANT
|
| 78 |
+
export NB_LAYER="1"
|
| 79 |
+
|
| 80 |
+
log="${output_path}/${release}/${assembly}_${basename}/${category}_${NB_LAYER}l_${LAYER_SIZE}n" # IMPORTANT# IMPORTANT# IMPORTANT# IMPORTANT
|
| 81 |
+
log="${log}/11c/complete_no_valid_oversample" # IMPORTANT
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# --- Creating correct paths for programs/launching ---
|
| 85 |
+
|
| 86 |
+
timestamp=$(date +%s)
|
| 87 |
+
|
| 88 |
+
hparams="${input_path}/hparams/${hparams}.json"
|
| 89 |
+
hdf5_list="${input_path}/hdf5_list/hg38_epiatlas-freeze-v2/${list_name}.list"
|
| 90 |
+
chroms="${input_path}/chromsizes/hg38.noy.chrom.sizes"
|
| 91 |
+
metadata="${input_path}/metadata/dfreeze-v2/hg38_2023-epiatlas-dfreeze-pospurge-nodup_filterCtl.json"
|
| 92 |
+
out1="${log}/output_job${SLURM_JOB_ID}_${SLURM_JOB_NAME}_${timestamp}.o"
|
| 93 |
+
out2="${log}/output_job${SLURM_JOB_ID}_${SLURM_JOB_NAME}_${timestamp}.e"
|
| 94 |
+
|
| 95 |
+
set -e
|
| 96 |
+
echo "Input arguments:"
|
| 97 |
+
for var in $hparams $hdf5_list $chroms $metadata
|
| 98 |
+
do
|
| 99 |
+
ls $var
|
| 100 |
+
done
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# --- Pre-checks ---
|
| 104 |
+
|
| 105 |
+
cd ${program_path}
|
| 106 |
+
|
| 107 |
+
printf '\n%s\n' "Launching following command"
|
| 108 |
+
printf '%s\n' "python ${program_path}/utils/check_dir.py ${log}"
|
| 109 |
+
python ${program_path}/utils/check_dir.py ${log}
|
| 110 |
+
|
| 111 |
+
printf '\n%s\n' "Launching following command"
|
| 112 |
+
printf '%s\n' "python ${program_path}/utils/preconditions.py -m ${metadata}"
|
| 113 |
+
python ${program_path}/utils/preconditions.py -m ${metadata}
|
| 114 |
+
|
| 115 |
+
# Preconditions passed, copy launch script to log dir.
|
| 116 |
+
if [[ -n "$SLURM_JOB_ID" ]];
|
| 117 |
+
then
|
| 118 |
+
scontrol write batch_script ${SLURM_JOB_ID} ${log}/launch_script_${SLURM_JOB_NAME}-job${SLURM_JOB_ID}.sh
|
| 119 |
+
fi
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# --- Transfer files to node scratch ---
|
| 123 |
+
|
| 124 |
+
if [[ -n "$SLURM_JOB_ID" ]];
|
| 125 |
+
then
|
| 126 |
+
hdf5s_location="/lustre06/project/6007515/ihec_share/local_ihec_data/epiatlas/hg38/hdf5"
|
| 127 |
+
name="epiatlas_dfreeze_${resolution}_all_none"
|
| 128 |
+
tar_file="${hdf5s_location}/${name}.tar" # IMPORTANT
|
| 129 |
+
|
| 130 |
+
cd $SLURM_TMPDIR
|
| 131 |
+
|
| 132 |
+
echo "Untaring $tar_file in $SLURM_TMPDIR"
|
| 133 |
+
tar -xf $tar_file
|
| 134 |
+
|
| 135 |
+
export HDF5_PARENT="${name}" # IMPORTANT
|
| 136 |
+
cd $name
|
| 137 |
+
scp ${hdf5s_location}/${name}-4ctl/* . #extra files in v2.1
|
| 138 |
+
fi
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# --- MAIN PROGRAM ---
|
| 142 |
+
|
| 143 |
+
echo "Time before launch: $(date +%F_%T)"
|
| 144 |
+
printf '\n%s\n' "Launching following command"
|
| 145 |
+
if [[ -n "$NO_VALID" ]]; #if variable exists
|
| 146 |
+
then
|
| 147 |
+
# --- no valid launch ---
|
| 148 |
+
if [[ "$log" == *"10fold"* ]]; then
|
| 149 |
+
log="$log/notactually10foldbaka"
|
| 150 |
+
printf '\n%s\n' "Incoherent log path, changing log to $log"
|
| 151 |
+
fi
|
| 152 |
+
|
| 153 |
+
printf '%s\n' "python ${program_path}/epiatlas_training_no_valid.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} > ${out1} 2> ${out2}"
|
| 154 |
+
python ${program_path}/epiatlas_training_no_valid.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} > "${out1}" 2> "${out2}"
|
| 155 |
+
echo "Time after launch: $(date +%F_%T)"
|
| 156 |
+
exit
|
| 157 |
+
|
| 158 |
+
elif [[ -n "$RESTORE" ]]; then
|
| 159 |
+
# --- kfold launch ---
|
| 160 |
+
printf '%s\n' "python ${program_path}/epiatlas_training.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} --restore > ${out1} 2> ${out2}"
|
| 161 |
+
python ${program_path}/epiatlas_training.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} --restore > "${out1}" 2> "${out2}"
|
| 162 |
+
exit
|
| 163 |
+
|
| 164 |
+
else
|
| 165 |
+
# --- kfold launch ---
|
| 166 |
+
printf '%s\n' "python ${program_path}/epiatlas_training.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} > ${out1} 2> ${out2}"
|
| 167 |
+
python ${program_path}/epiatlas_training.py $category ${hparams} ${hdf5_list} ${chroms} ${metadata} ${log} > "${out1}" 2> "${out2}"
|
| 168 |
+
fi
|
| 169 |
+
echo "Time after launch: $(date +%F_%T)"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# --- More logging ---
|
| 174 |
+
set +e
|
| 175 |
+
|
| 176 |
+
if [[ -z "$NO_VALID" ]]; #if variable is empty or unset
|
| 177 |
+
then
|
| 178 |
+
export LOG="${log}"
|
| 179 |
+
export NO_TRUE="False"
|
| 180 |
+
|
| 181 |
+
cd ${log}
|
| 182 |
+
printf '\n%s\n' "Launching following command"
|
| 183 |
+
printf '%s\n' "cat split*/validation_prediction.csv | sort -ru > full-10fold-validation_prediction.csv"
|
| 184 |
+
cat split*/validation_prediction.csv | sort -ru > full-10fold-validation_prediction.csv
|
| 185 |
+
|
| 186 |
+
to_augment="${log}/full-10fold-validation_prediction.csv"
|
| 187 |
+
|
| 188 |
+
printf '\n%s\n' "Launching following command"
|
| 189 |
+
printf '%s\n' "python ${program_path}/utils/augment_predict_file.py ${to_augment} ${metadata} --all-categories"
|
| 190 |
+
python ${program_path}/utils/augment_predict_file.py ${to_augment} ${metadata} --all-categories
|
| 191 |
+
|
| 192 |
+
printf '%s\n' "python ${program_path}/utils/create_confusion_matrices.py --from_prediction ${to_augment}"
|
| 193 |
+
python ${program_path}/utils/create_confusion_matrices.py --from_prediction ${to_augment}
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# Copy slurm output file to log dir
|
| 197 |
+
if [[ -n "$SLURM_JOB_ID" ]];
|
| 198 |
+
then
|
| 199 |
+
slurm_out_file="${SLURM_JOB_NAME}-*${SLURM_JOB_ID}.out"
|
| 200 |
+
cp -v ${slurm_out_folder}/${slurm_out_file} ${log}/
|
| 201 |
+
fi
|
| 202 |
+
fi
|
complete_no_valid_oversample/output_job21513819_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none_1695753894.e
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CometLogger will be initialized in online mode
|
| 2 |
+
COMET INFO: Experiment is live on comet.ml https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 3 |
+
|
| 4 |
+
Using 16bit native Automatic Mixed Precision (AMP)
|
| 5 |
+
GPU available: True, used: True
|
| 6 |
+
TPU available: False, using: 0 TPU cores
|
| 7 |
+
IPU available: False, using: 0 IPUs
|
| 8 |
+
HPU available: False, using: 0 HPUs
|
| 9 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
|
| 10 |
+
COMET INFO: ---------------------------
|
| 11 |
+
COMET INFO: Comet.ml Experiment Summary
|
| 12 |
+
COMET INFO: ---------------------------
|
| 13 |
+
COMET INFO: Data:
|
| 14 |
+
COMET INFO: display_summary_level : 1
|
| 15 |
+
COMET INFO: url : https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 16 |
+
COMET INFO: Metrics [count] (min, max):
|
| 17 |
+
COMET INFO: train_acc [300] : (0.8985894322395325, 0.9989583492279053)
|
| 18 |
+
COMET INFO: train_loss [300] : (0.010709596797823906, 0.40300047397613525)
|
| 19 |
+
COMET INFO: Others:
|
| 20 |
+
COMET INFO: Code version / commit : v0.7.5.1-7-ga6afe82
|
| 21 |
+
COMET INFO: Experience key : 0f8e5eb996114868a17057bebe64f87c
|
| 22 |
+
COMET INFO: HDF5 Resolution : 100.0kb
|
| 23 |
+
COMET INFO: Initial hdf5 loading time : 0:01:09
|
| 24 |
+
COMET INFO: Name : assay_epiclass_1l_3000n-11c-complete_no_valid_oversample
|
| 25 |
+
COMET INFO: SLURM_JOB_ID : 21513819
|
| 26 |
+
COMET INFO: Total nb of files : 20922
|
| 27 |
+
COMET INFO: category : assay_epiclass
|
| 28 |
+
COMET INFO: test size : 0
|
| 29 |
+
COMET INFO: train size : 46128
|
| 30 |
+
COMET INFO: validation size : 0
|
| 31 |
+
COMET INFO: Parameters:
|
| 32 |
+
COMET INFO: hl_units : 3000
|
| 33 |
+
COMET INFO: hparams/batch_size : 64
|
| 34 |
+
COMET INFO: hparams/is_training : True
|
| 35 |
+
COMET INFO: hparams/keep_prob : 0.5
|
| 36 |
+
COMET INFO: hparams/l2_scale : 0.01
|
| 37 |
+
COMET INFO: hparams/learning_rate : 1e-06
|
| 38 |
+
COMET INFO: hparams/measure_frequency : 1
|
| 39 |
+
COMET INFO: hparams/oversampling : True
|
| 40 |
+
COMET INFO: hparams/training_epochs : 300
|
| 41 |
+
COMET INFO: input_size : 30321
|
| 42 |
+
COMET INFO: mapping/0 : h3k27ac
|
| 43 |
+
COMET INFO: mapping/1 : h3k27me3
|
| 44 |
+
COMET INFO: mapping/10 : wgbs-standard
|
| 45 |
+
COMET INFO: mapping/2 : h3k36me3
|
| 46 |
+
COMET INFO: mapping/3 : h3k4me1
|
| 47 |
+
COMET INFO: mapping/4 : h3k4me3
|
| 48 |
+
COMET INFO: mapping/5 : h3k9me3
|
| 49 |
+
COMET INFO: mapping/6 : input
|
| 50 |
+
COMET INFO: mapping/7 : mrna_seq
|
| 51 |
+
COMET INFO: mapping/8 : rna_seq
|
| 52 |
+
COMET INFO: mapping/9 : wgbs-pbat
|
| 53 |
+
COMET INFO: nb_layer : 1
|
| 54 |
+
COMET INFO: output_size : 11
|
| 55 |
+
COMET INFO: Uploads:
|
| 56 |
+
COMET INFO: asset : 1 (120 bytes)
|
| 57 |
+
COMET INFO: environment details : 1
|
| 58 |
+
COMET INFO: installed packages : 1
|
| 59 |
+
COMET INFO: model graph : 1
|
| 60 |
+
COMET INFO: ---------------------------
|
| 61 |
+
COMET INFO: Uploading metrics, params, and assets to Comet before program termination (may take several seconds)
|
| 62 |
+
COMET INFO: The Python SDK has 3600 seconds to finish before aborting...
|
| 63 |
+
COMET INFO: Uploading 1 metrics, params and output messages
|
| 64 |
+
COMET INFO: Experiment is live on comet.ml https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 65 |
+
|
| 66 |
+
CometLogger will be initialized in online mode
|
| 67 |
+
COMET INFO: -----------------------------------
|
| 68 |
+
COMET INFO: Comet.ml ExistingExperiment Summary
|
| 69 |
+
COMET INFO: -----------------------------------
|
| 70 |
+
COMET INFO: Data:
|
| 71 |
+
COMET INFO: display_summary_level : 1
|
| 72 |
+
COMET INFO: url : https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 73 |
+
COMET INFO: Others:
|
| 74 |
+
COMET INFO: Name : assay_epiclass_1l_3000n-11c-complete_no_valid_oversample
|
| 75 |
+
COMET INFO: Uploads:
|
| 76 |
+
COMET INFO: installed packages : 1
|
| 77 |
+
COMET INFO: -----------------------------------
|
| 78 |
+
COMET INFO: Experiment is live on comet.ml https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 79 |
+
|
| 80 |
+
COMET INFO: -----------------------------------
|
| 81 |
+
COMET INFO: Comet.ml ExistingExperiment Summary
|
| 82 |
+
COMET INFO: -----------------------------------
|
| 83 |
+
COMET INFO: Data:
|
| 84 |
+
COMET INFO: display_summary_level : 1
|
| 85 |
+
COMET INFO: url : https://www.comet.com/rabyj/epilap/0f8e5eb996114868a17057bebe64f87c
|
| 86 |
+
COMET INFO: Metrics:
|
| 87 |
+
COMET INFO: Last epoch : 300
|
| 88 |
+
COMET INFO: Training time : 0:53:53
|
| 89 |
+
COMET INFO: tra_Accuracy : 0.9995014071464539
|
| 90 |
+
COMET INFO: tra_F1Score : 0.9993157982826233
|
| 91 |
+
COMET INFO: tra_MatthewsCorrCoef : 0.9994467496871948
|
| 92 |
+
COMET INFO: tra_Precision : 0.9992875456809998
|
| 93 |
+
COMET INFO: tra_Recall : 0.9993447661399841
|
| 94 |
+
COMET INFO: Uploads:
|
| 95 |
+
COMET INFO: asset : 3 (530.26 KB)
|
| 96 |
+
COMET INFO: installed packages : 1
|
| 97 |
+
COMET INFO: -----------------------------------
|
| 98 |
+
COMET INFO: Uploading 1 metrics, params and output messages
|
| 99 |
+
COMET INFO: Waiting for completion of the file uploads (may take several seconds)
|
| 100 |
+
COMET INFO: The Python SDK has 10800 seconds to finish before aborting...
|
| 101 |
+
COMET INFO: All files uploaded, waiting for confirmation they have been all received
|
complete_no_valid_oversample/output_job21513819_NN-dfreezev2.1-assay_epiclass-complete_no_valid-oversample-100kb_all_none_1695753894.o
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
begin 2023-09-26 18:45:10
|
| 2 |
+
Checking environment variables.
|
| 3 |
+
ASSAY_LIST: ['h3k27ac', 'h3k27me3', 'h3k36me3', 'h3k4me1', 'h3k4me3', 'h3k9me3', 'input', 'rna_seq', 'mrna_seq', 'wgbs-standard', 'wgbs-pbat']
|
| 4 |
+
Filtering metadata: Only keeping examples with targets/assay ['h3k27ac', 'h3k27me3', 'h3k36me3', 'h3k4me1', 'h3k4me3', 'h3k9me3', 'input', 'rna_seq', 'mrna_seq', 'wgbs-standard', 'wgbs-pbat']
|
| 5 |
+
EXCLUDE_LIST: ['other', '--', 'NA', '']
|
| 6 |
+
Filtering metadata: Removing labels ['other', '--', 'NA', ''] from category 'assay_epiclass'.
|
| 7 |
+
No label list, considering all left classes : ['h3k27ac', 'h3k27me3', 'h3k36me3', 'h3k4me1', 'h3k4me3', 'h3k9me3', 'input', 'mrna_seq', 'rna_seq', 'wgbs-pbat', 'wgbs-standard']
|
| 8 |
+
11/11 labels left from assay_epiclass after removing classes with less than 10 signals.
|
| 9 |
+
assay_epiclass label breakdown for unique experiments (uuid):
|
| 10 |
+
h3k27ac: 1563
|
| 11 |
+
rna_seq: 1159
|
| 12 |
+
h3k4me1: 963
|
| 13 |
+
h3k4me3: 799
|
| 14 |
+
input: 777
|
| 15 |
+
h3k36me3: 695
|
| 16 |
+
h3k27me3: 675
|
| 17 |
+
h3k9me3: 642
|
| 18 |
+
wgbs-standard: 442
|
| 19 |
+
mrna_seq: 396
|
| 20 |
+
wgbs-pbat: 130
|
| 21 |
+
For 8241 unique experiments in 11 classes
|
| 22 |
+
|
| 23 |
+
Using files in /localscratch/rabyj.21513819.0/epiatlas_dfreeze_100kb_all_none
|
| 24 |
+
Initial hdf5 loading time: 0:01:09
|
| 25 |
+
The current experiment key is 0f8e5eb996114868a17057bebe64f87c
|
| 26 |
+
The current commit is v0.7.5.1-7-ga6afe82
|
| 27 |
+
Split 0 train size: 46128
|
| 28 |
+
Split 0 validation size: 0
|
| 29 |
+
Split 0 test size: 0
|
| 30 |
+
Total nb of files: 20922
|
| 31 |
+
--MODEL STRUCTURE--
|
| 32 |
+
LightningDenseClassifier(
|
| 33 |
+
(_pt_model): Sequential(
|
| 34 |
+
(0): Dropout(p=0.1, inplace=False)
|
| 35 |
+
(1): Linear(in_features=30321, out_features=3000, bias=True)
|
| 36 |
+
(2): Dropout(p=0.5, inplace=False)
|
| 37 |
+
(3): ReLU()
|
| 38 |
+
(4): Linear(in_features=3000, out_features=11, bias=True)
|
| 39 |
+
)
|
| 40 |
+
(metrics): MetricCollection(
|
| 41 |
+
(Accuracy): Accuracy()
|
| 42 |
+
(Precision): Precision()
|
| 43 |
+
(Recall): Recall()
|
| 44 |
+
(F1Score): F1Score()
|
| 45 |
+
(MatthewsCorrCoef): MatthewsCorrCoef()
|
| 46 |
+
)
|
| 47 |
+
(train_acc): Accuracy()
|
| 48 |
+
(valid_acc): Accuracy()
|
| 49 |
+
)
|
| 50 |
+
--MODEL SUMMARY--
|
| 51 |
+
===================================================================================================================
|
| 52 |
+
Layer (type:depth-idx) Input Shape Output Shape Param #
|
| 53 |
+
===================================================================================================================
|
| 54 |
+
LightningDenseClassifier -- -- --
|
| 55 |
+
ββSequential: 1-1 [1, 30321] [1, 11] --
|
| 56 |
+
β ββDropout: 2-1 [1, 30321] [1, 30321] --
|
| 57 |
+
β ββLinear: 2-2 [1, 30321] [1, 3000] 90,966,000
|
| 58 |
+
β ββDropout: 2-3 [1, 3000] [1, 3000] --
|
| 59 |
+
β ββReLU: 2-4 [1, 3000] [1, 3000] --
|
| 60 |
+
β ββLinear: 2-5 [1, 3000] [1, 11] 33,011
|
| 61 |
+
ββMetricCollection: 1-2 -- -- --
|
| 62 |
+
β ββAccuracy: 2-6 -- -- --
|
| 63 |
+
β ββPrecision: 2-7 -- -- --
|
| 64 |
+
β ββRecall: 2-8 -- -- --
|
| 65 |
+
β ββF1Score: 2-9 -- -- --
|
| 66 |
+
β ββMatthewsCorrCoef: 2-10 -- -- --
|
| 67 |
+
ββAccuracy: 1-3 -- -- --
|
| 68 |
+
ββAccuracy: 1-4 -- -- --
|
| 69 |
+
===================================================================================================================
|
| 70 |
+
Total params: 90,999,011
|
| 71 |
+
Trainable params: 90,999,011
|
| 72 |
+
Non-trainable params: 0
|
| 73 |
+
Total mult-adds (M): 91.00
|
| 74 |
+
===================================================================================================================
|
| 75 |
+
Input size (MB): 0.12
|
| 76 |
+
Forward/backward pass size (MB): 0.02
|
| 77 |
+
Params size (MB): 364.00
|
| 78 |
+
Estimated Total Size (MB): 364.14
|
| 79 |
+
===================================================================================================================
|
| 80 |
+
--TRAINING HYPERPARAMETERS--
|
| 81 |
+
L2 scale : 0.01
|
| 82 |
+
Dropout rate : 0.5
|
| 83 |
+
Learning rate : 1e-06
|
| 84 |
+
No early stopping.
|
| 85 |
+
Training batch size : 64
|
| 86 |
+
ββββββ³βββββββββββββββββββββββββββ³βββββββββββββββββββ³βββββββββ
|
| 87 |
+
β β Name β Type β Params β
|
| 88 |
+
β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©
|
| 89 |
+
β 0 β _pt_model β Sequential β 91.0 M β
|
| 90 |
+
β 1 β _pt_model.0 β Dropout β 0 β
|
| 91 |
+
β 2 β _pt_model.1 β Linear β 91.0 M β
|
| 92 |
+
β 3 β _pt_model.2 β Dropout β 0 β
|
| 93 |
+
β 4 β _pt_model.3 β ReLU β 0 β
|
| 94 |
+
β 5 β _pt_model.4 β Linear β 33.0 K β
|
| 95 |
+
β 6 β metrics β MetricCollection β 0 β
|
| 96 |
+
β 7 β metrics.Accuracy β Accuracy β 0 β
|
| 97 |
+
β 8 β metrics.Precision β Precision β 0 β
|
| 98 |
+
β 9 β metrics.Recall β Recall β 0 β
|
| 99 |
+
β 10 β metrics.F1Score β F1Score β 0 β
|
| 100 |
+
β 11 β metrics.MatthewsCorrCoef β MatthewsCorrCoef β 0 β
|
| 101 |
+
β 12 β train_acc β Accuracy β 0 β
|
| 102 |
+
β 13 β valid_acc β Accuracy β 0 β
|
| 103 |
+
ββββββ΄βββββββββββββββββββββββββββ΄βββββββββββββββββββ΄βββββββββ
|
| 104 |
+
Trainable params: 91.0 M
|
| 105 |
+
Non-trainable params: 0
|
| 106 |
+
Total params: 91.0 M
|
| 107 |
+
Total estimated model params size (MB): 181
|
| 108 |
+
Saving model to /lustre06/project/6007017/rabyj/epilap/output/logs/epiatlas-dfreeze-v2.1/hg38_100kb_all_none/assay_epiclass_1l_3000n/11c/complete_no_valid_oversample/EpiLaP/0f8e5eb996114868a17057bebe64f87c/checkpoints/epoch=299-step=300.ckpt
|
| 109 |
+
training time: 0:53:53
|
| 110 |
+
Reading checkpoint list and taking last line.
|
| 111 |
+
Loading model from /lustre06/project/6007017/rabyj/epilap/output/logs/epiatlas-dfreeze-v2.1/hg38_100kb_all_none/assay_epiclass_1l_3000n/11c/complete_no_valid_oversample/EpiLaP/0f8e5eb996114868a17057bebe64f87c/checkpoints/epoch=299-step=300.ckpt
|
| 112 |
+
--- training set METRICS ---
|
| 113 |
+
Accuracy 1.000
|
| 114 |
+
Precision 0.999
|
| 115 |
+
Recall 0.999
|
| 116 |
+
F1Score 0.999
|
| 117 |
+
MatthewsCorrCoef 0.999
|
| 118 |
+
1.000 0.999 0.999 0.999 0.999
|
complete_no_valid_oversample/split0_training_2023-09-26_18-46-35.md5
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
complete_no_valid_oversample/train_confusion_matrix.csv
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,h3k27ac,h3k27me3,h3k36me3,h3k4me1,h3k4me3,h3k9me3,input,mrna_seq,rna_seq,wgbs-pbat,wgbs-standard
|
| 2 |
+
h3k27ac(5544),5543,0,0,0,0,0,1,0,0,0,0
|
| 3 |
+
h3k27me3(5175),0,5173,0,0,0,0,2,0,0,0,0
|
| 4 |
+
h3k36me3(5376),0,0,5376,0,0,0,0,0,0,0,0
|
| 5 |
+
h3k4me1(4905),0,0,0,4905,0,0,0,0,0,0,0
|
| 6 |
+
h3k4me3(5373),0,2,0,0,5371,0,0,0,0,0,0
|
| 7 |
+
h3k9me3(5457),0,0,0,0,0,5457,0,0,0,0,0
|
| 8 |
+
input(2583),0,2,0,0,0,0,2581,0,0,0,0
|
| 9 |
+
mrna_seq(2612),0,0,0,0,0,0,0,2608,4,0,0
|
| 10 |
+
rna_seq(3027),0,0,0,0,0,0,0,12,3015,0,0
|
| 11 |
+
wgbs-pbat(3038),0,0,0,0,0,0,0,0,0,3038,0
|
| 12 |
+
wgbs-standard(3038),0,0,0,0,0,0,0,0,0,0,3038
|
complete_no_valid_oversample/train_confusion_matrix.png
ADDED
|
Git LFS Details
|
complete_no_valid_oversample/train_confusion_matrix_relative.csv
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,h3k27ac,h3k27me3,h3k36me3,h3k4me1,h3k4me3,h3k9me3,input,mrna_seq,rna_seq,wgbs-pbat,wgbs-standard
|
| 2 |
+
h3k27ac(5544),0.9998,0.0000,0.0000,0.0000,0.0000,0.0000,0.0002,0.0000,0.0000,0.0000,0.0000
|
| 3 |
+
h3k27me3(5175),0.0000,0.9996,0.0000,0.0000,0.0000,0.0000,0.0004,0.0000,0.0000,0.0000,0.0000
|
| 4 |
+
h3k36me3(5376),0.0000,0.0000,1.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
|
| 5 |
+
h3k4me1(4905),0.0000,0.0000,0.0000,1.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
|
| 6 |
+
h3k4me3(5373),0.0000,0.0004,0.0000,0.0000,0.9996,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
|
| 7 |
+
h3k9me3(5457),0.0000,0.0000,0.0000,0.0000,0.0000,1.0000,0.0000,0.0000,0.0000,0.0000,0.0000
|
| 8 |
+
input(2583),0.0000,0.0008,0.0000,0.0000,0.0000,0.0000,0.9992,0.0000,0.0000,0.0000,0.0000
|
| 9 |
+
mrna_seq(2612),0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.9985,0.0015,0.0000,0.0000
|
| 10 |
+
rna_seq(3027),0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0040,0.9960,0.0000,0.0000
|
| 11 |
+
wgbs-pbat(3038),0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1.0000,0.0000
|
| 12 |
+
wgbs-standard(3038),0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1.0000
|
complete_no_valid_oversample/training_mapping.tsv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0 h3k27ac
|
| 2 |
+
1 h3k27me3
|
| 3 |
+
2 h3k36me3
|
| 4 |
+
3 h3k4me1
|
| 5 |
+
4 h3k4me3
|
| 6 |
+
5 h3k9me3
|
| 7 |
+
6 input
|
| 8 |
+
7 mrna_seq
|
| 9 |
+
8 rna_seq
|
| 10 |
+
9 wgbs-pbat
|
| 11 |
+
10 wgbs-standard
|