eachanjohnson commited on Apr 1, 2025

Commit

20b47ac

verified ·

1 Parent(s): bbec84a

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

README.md +19 -19
data-load-args.json +1 -1
eval-metrics_test.json +3 -3
eval-metrics_train.json +3 -3
eval-metrics_validation.json +3 -3
logs-csv/lightning_logs/version_0/hparams.yaml +1 -13
logs-csv/lightning_logs/version_0/metrics.csv +0 -0
logs/lightning_logs/version_0/events.out.tfevents.1743276990.cn075.1752123.0 +3 -0
logs/lightning_logs/version_0/hparams.yaml +1 -13
metrics.csv +3 -3
modelbox-config.json +5 -5
params.pt +2 -2
predictions_test.csv.gz +2 -2
predictions_test.png +0 -0
predictions_train.csv.gz +2 -2
predictions_train.png +0 -0
predictions_validation.csv.gz +2 -2
predictions_validation.png +0 -0
training-data.hf/cache-be42d5605d12b0e6.arrow +3 -0
training-data.hf/data-00000-of-00001.arrow +2 -2
training-data.hf/dataset_info.json +78 -4
training-data.hf/state.json +1 -1
training-log.csv +0 -0
training-log.png +0 -0

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ datasets:
 # Predictor of _Yersinia pestis_ MICs
-_Updated:_ cd ..Tue Apr  1 03:31:50 BST 2025
 Trained on the _Yersinia pestis_, WT accumulator phenotype subset of the [human-curated SPARK dataset](https://doi.org/10.1021/acsinfecdis.8b00193) (10002 rows in total for _Yersinia pestis_).
@@ -26,7 +26,7 @@ Duvida also saves the training data in this checkpoint to allows the calculation
 based on that training data.
 This model is the best regression model from a hyperparameter search, determined
-by Spearman's $\rho$ on a held-out test set not used in training or early stopping.
 ### Model architecture
@@ -35,13 +35,13 @@ by Spearman's $\rho$ on a held-out test set not used in training or early stoppi
 ```json
 {
-    "dropout": 0.2,
     "ensemble_size": 3,
     "extra_featurizers": null,
-    "learning_rate": 1e-05,
-    "model_class": "FPMLPModelBox",
-    "n_hidden": 5,
-    "n_units": 64,
     "use_2d": true,
     "use_fp": true
 }
@@ -79,39 +79,39 @@ Train (7002 rows):
 ```json
 {
-    "Pearson r": 0.8172829331255921,
-    "RMSE": 0.031726885586977005,
-    "Spearman rho": 0.9709886269987218
 }
 ```
-<img src="predictions-train.png" width=450>
 Validation (1499 rows):
 ```json
 {
-    "Pearson r": 0.722565134551274,
-    "RMSE": 0.038165923207998276,
-    "Spearman rho": 0.9247149302918276
 }
 ```
-<img src="predictions-validation.png" width=450>
 Test (1501 rows):
 ```json
 {
-    "Pearson r": 0.7763133307532436,
-    "RMSE": 0.035939376801252365,
-    "Spearman rho": 0.9701919932392147
 }
 ```
-<img src="predictions-test.png" width=450>
 ## Training data details

 # Predictor of _Yersinia pestis_ MICs
+_Updated:_ Tue  1 Apr 08:03:01 BST 2025
 Trained on the _Yersinia pestis_, WT accumulator phenotype subset of the [human-curated SPARK dataset](https://doi.org/10.1021/acsinfecdis.8b00193) (10002 rows in total for _Yersinia pestis_).
 based on that training data.
 This model is the best regression model from a hyperparameter search, determined
+by Pearson's $$r$$ on a held-out test set not used in training or early stopping.
 ### Model architecture
 ```json
 {
+    "dropout": 0.0,
     "ensemble_size": 3,
     "extra_featurizers": null,
+    "learning_rate": 0.0001,
+    "model_class": "ChempropModelBox",
+    "n_hidden": 1,
+    "n_units": 256,
     "use_2d": true,
     "use_fp": true
 }
 ```json
 {
+    "Pearson r": 0.9997487398096949,
+    "RMSE": 0.001503719948232174,
+    "Spearman rho": 0.9995015503182805
 }
 ```
+<img src="predictions_train.png" width=450>
 Validation (1499 rows):
 ```json
 {
+    "Pearson r": 0.7851248408551393,
+    "RMSE": 0.019412975758314133,
+    "Spearman rho": 0.934205549109032
 }
 ```
+<img src="predictions_validation.png" width=450>
 Test (1501 rows):
 ```json
 {
+    "Pearson r": 0.7895561902856947,
+    "RMSE": 0.016430044546723366,
+    "Spearman rho": 0.9670287095583492
 }
 ```
+<img src="predictions_test.png" width=450>
 ## Training data details

data-load-args.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "cache": "/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/117/cache",
     "features": [
         "smiles"
     ],

 {
+    "cache": "/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/79/cache",
     "features": [
         "smiles"
     ],

eval-metrics_test.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "Pearson r": 0.7763133307532436,
-    "RMSE": 0.035939376801252365,
-    "Spearman rho": 0.9701919932392147
 }

 {
+    "Pearson r": 0.7895561902856947,
+    "RMSE": 0.016430044546723366,
+    "Spearman rho": 0.9670287095583492
 }

eval-metrics_train.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "Pearson r": 0.8172829331255921,
-    "RMSE": 0.031726885586977005,
-    "Spearman rho": 0.9709886269987218
 }

 {
+    "Pearson r": 0.9997487398096949,
+    "RMSE": 0.001503719948232174,
+    "Spearman rho": 0.9995015503182805
 }

eval-metrics_validation.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "Pearson r": 0.722565134551274,
-    "RMSE": 0.038165923207998276,
-    "Spearman rho": 0.9247149302918276
 }

 {
+    "Pearson r": 0.7851248408551393,
+    "RMSE": 0.019412975758314133,
+    "Spearman rho": 0.934205549109032
 }

logs-csv/lightning_logs/version_0/hparams.yaml CHANGED Viewed

@@ -1,13 +1 @@
-dropout: 0.2
-ensemble_size: 3
-extra_featurizers: null
-learning_rate: 1.0e-05
-n_hidden: 5
-n_input: 2248
-n_out: 1
-n_units: 64
-optimizer: !!python/name:torch.optim.adam.Adam ''
-reduce_lr_on_plateau: true
-reduce_lr_patience: 10
-use_2d: true
-use_fp: true


1	+ {}

logs-csv/lightning_logs/version_0/metrics.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

logs/lightning_logs/version_0/events.out.tfevents.1743276990.cn075.1752123.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f566aa633d483ff740936c359ce37e185ed3664969e3b4f2d28e44dd63021506
+size 531562

logs/lightning_logs/version_0/hparams.yaml CHANGED Viewed

@@ -1,13 +1 @@
-dropout: 0.2
-ensemble_size: 3
-extra_featurizers: null
-learning_rate: 1.0e-05
-n_hidden: 5
-n_input: 2248
-n_out: 1
-n_units: 64
-optimizer: !!python/name:torch.optim.adam.Adam ''
-reduce_lr_on_plateau: true
-reduce_lr_patience: 10
-use_2d: true
-use_fp: true


1	+ {}

metrics.csv CHANGED Viewed

@@ -1,4 +1,4 @@
 split,split_filename,config_i,model_class,n_parameters,filename,features,labels,cache,extra_featurizers,use_2d,use_fp,dropout,ensemble_size,learning_rate,n_hidden,n_units,val_filename,epochs,batch_size,RMSE,Pearson r,Spearman rho
-train,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,117,FPMLPModelBox,481923,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/117/cache,,True,True,0.2,3,1e-05,5,64,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.031726885586977005,0.8172829331255921,0.9709886269987218
-validation,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,117,FPMLPModelBox,481923,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/117/cache,,True,True,0.2,3,1e-05,5,64,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.038165923207998276,0.722565134551274,0.9247149302918276
-test,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-test.csv.gz,117,FPMLPModelBox,481923,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/117/cache,,True,True,0.2,3,1e-05,5,64,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.035939376801252365,0.7763133307532436,0.9701919932392147

 split,split_filename,config_i,model_class,n_parameters,filename,features,labels,cache,extra_featurizers,use_2d,use_fp,dropout,ensemble_size,learning_rate,n_hidden,n_units,val_filename,epochs,batch_size,RMSE,Pearson r,Spearman rho
+train,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,79,ChempropModelBox,2641503,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/79/cache,,True,True,0.0,3,0.0001,1,256,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.001503719948232174,0.9997487398096949,0.9995015503182805
+validation,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,79,ChempropModelBox,2641503,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/79/cache,,True,True,0.0,3,0.0001,1,256,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.019412975758314133,0.7851248408551393,0.934205549109032
+test,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-test.csv.gz,79,ChempropModelBox,2641503,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-train.csv.gz,['smiles'],['pmic'],/nemo/lab/johnsone/home/users/johnsoe/projects/abx-discovery-strategy/models/spark-dv-2503/Yersinia-pestis/79/cache,,True,True,0.0,3,0.0001,1,256,/nemo/lab/johnsone/home/users/johnsoe/data/datasets/thomas-2018-spark-wt/Yersinia-pestis/scaffold-split-validation.csv.gz,10000,16,0.016430044546723366,0.7895561902856947,0.9670287095583492

modelbox-config.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
-    "dropout": 0.2,
     "ensemble_size": 3,
     "extra_featurizers": null,
-    "learning_rate": 1e-05,
-    "model_class": "FPMLPModelBox",
-    "n_hidden": 5,
-    "n_units": 64,
     "use_2d": true,
     "use_fp": true
 }

 {
+    "dropout": 0.0,
     "ensemble_size": 3,
     "extra_featurizers": null,
+    "learning_rate": 0.0001,
+    "model_class": "ChempropModelBox",
+    "n_hidden": 1,
+    "n_units": 256,
     "use_2d": true,
     "use_fp": true
 }

params.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee16fefc5cc589f4e71428f11f98164cd55805c9fd9d5e06a283979b46465670
-size 1947266

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ce837e25980d811d35ae69dfa284bb6e0e0781aa2cfc61db0d304898cc0bc40
+size 10591438

predictions_test.csv.gz CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7c727a4bf844df723fd5f3795e18cad06b5d022e79916020fa624228c572f38
-size 37895

 version https://git-lfs.github.com/spec/v1
+oid sha256:be3f965a781921dc8e800478db924ed2d0611171cae0cbcb0afe91bd755e9a7f
+size 1496218

predictions_test.png CHANGED Viewed

predictions_train.csv.gz CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c0ed7b3d692d0061c30bff80b6c9f01403b123bc440a0fd7d37b8604e7fc51b
-size 147876

 version https://git-lfs.github.com/spec/v1
+oid sha256:d941f5a5e6811308e8b132b3d134cea1b35f9569048d3507f0ebefe8be659919
+size 6112338

predictions_train.png CHANGED Viewed

predictions_validation.csv.gz CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e49d722bbcc0f7971ca99fa787d558c669c97378ff5bb4d8d2ab41343726cd6
-size 36285

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1daae2036610439d9ea825c67629c9499a1fa75ec049ff8d212bb3513a760c1
+size 1428218

predictions_validation.png CHANGED Viewed

training-data.hf/cache-be42d5605d12b0e6.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3969c11dc8a66b325abfc4677833ab3fb0f19d5df6f159e6f38be7f7204b71
+size 196560040

training-data.hf/data-00000-of-00001.arrow CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db76da8bf189fcd4b2421108c5534d504b8d09606b4361f62d45689219923beb
-size 126338936

 version https://git-lfs.github.com/spec/v1
+oid sha256:249c80ca1a8facf29ab36711a3baf6d535684f98faa309662d225d6de99992d9
+size 195855616

training-data.hf/dataset_info.json CHANGED Viewed

@@ -14,19 +14,93 @@
   "download_size": 559702,
   "features": {
     "smiles": {
-      "dtype": "string",
-      "_type": "Value"
     },
     "inputs": {
       "feature": {
         "dtype": "float64",
         "_type": "Value"
       },
       "_type": "Sequence"
     },
-    "labels": {
       "feature": {
-        "dtype": "float64",
         "_type": "Value"
       },
       "_type": "Sequence"

   "download_size": 559702,
   "features": {
     "smiles": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
     },
     "inputs": {
+      "V_d": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "gt_mask": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "lt_mask": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "mg": {
+        "E": {
+          "feature": {
+            "feature": {
+              "dtype": "float32",
+              "_type": "Value"
+            },
+            "_type": "Sequence"
+          },
+          "_type": "Sequence"
+        },
+        "V": {
+          "feature": {
+            "feature": {
+              "dtype": "float32",
+              "_type": "Value"
+            },
+            "_type": "Sequence"
+          },
+          "_type": "Sequence"
+        },
+        "edge_index": {
+          "feature": {
+            "feature": {
+              "dtype": "float32",
+              "_type": "Value"
+            },
+            "_type": "Sequence"
+          },
+          "_type": "Sequence"
+        },
+        "rev_edge_index": {
+          "feature": {
+            "dtype": "float32",
+            "_type": "Value"
+          },
+          "_type": "Sequence"
+        }
+      },
+      "weight": {
+        "dtype": "float32",
+        "_type": "Value"
+      },
+      "x_d": {
+        "feature": {
+          "dtype": "float32",
+          "_type": "Value"
+        },
+        "_type": "Sequence"
+      },
+      "y": {
+        "feature": {
+          "dtype": "float32",
+          "_type": "Value"
+        },
+        "_type": "Sequence"
+      }
+    },
+    "labels": {
       "feature": {
         "dtype": "float64",
         "_type": "Value"
       },
       "_type": "Sequence"
     },
+    "extra_features": {
       "feature": {
+        "dtype": "float32",
         "_type": "Value"
       },
       "_type": "Sequence"

training-data.hf/state.json CHANGED Viewed

@@ -4,7 +4,7 @@
       "filename": "data-00000-of-00001.arrow"
     }
   ],
-  "_fingerprint": "187fee60d2df35f6",
   "_format_columns": null,
   "_format_kwargs": {
     "dtype": "float"

       "filename": "data-00000-of-00001.arrow"
     }
   ],
+  "_fingerprint": "0f7055de735ed62e",
   "_format_columns": null,
   "_format_kwargs": {
     "dtype": "float"

training-log.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

training-log.png CHANGED Viewed