diff --git a/training_data/hemo-negative.npz b/training_data/hemo-negative.npz index 37eac85dba8ee35d792caf5e71157166188ea6c4..ab11bf7dd8957596d82f583b291a1f94c924adce 100644 --- a/training_data/hemo-negative.npz +++ b/training_data/hemo-negative.npz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f83aad41f160deb6401bc0801bddc931488da6e1785749e6f72de6d0f154a37f -size 109451 +oid sha256:bcf254803d7f5a809153007989de42b9ca9030c3a45b1f4048f40b289d010012 +size 11385064 diff --git a/training_data/hemo-positive.npz b/training_data/hemo-positive.npz index 48c4df6ed11eef1cafb0ac10f7dd26e6256d94e0..287ec76b48b5b228d502beb96c096e943ec60a95 100644 --- a/training_data/hemo-positive.npz +++ b/training_data/hemo-positive.npz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96cb24d5a7617f7e211cd48d2b0b424a46affa95716b96058058902068068d27 -size 27840 +oid sha256:c4dd8a83ebf887e285bd5d10e3cee919452d8ddf97463e02f52ee51789aebb41 +size 2775784 diff --git a/training_data/nf-negative.npz b/training_data/nf-negative.npz index 5cda7af5e41bbfedbbac0eeb3fd25b72f69e509d..8fa50477c360227708a42c2bc355003258ae7974 100644 --- a/training_data/nf-negative.npz +++ b/training_data/nf-negative.npz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e506e52e101308dd3882ca6bd45833a6e0837f9f240aa85d575c2a41e305b854 -size 21845190 +oid sha256:697db57ca3cf2366caabc000b69019e3b84fe88fea5da98de3fd75b3c9920aeb +size 21736264 diff --git a/training_data/nf-positive.npz b/training_data/nf-positive.npz index 85b64212de7042581621d0cdd6f16335df8eb54a..cbbb9240f4296497e28c7ea1da773dc7867921d0 100644 --- a/training_data/nf-positive.npz +++ b/training_data/nf-positive.npz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78caae183fe840b145275d9486a3f94a963989deb9d55a57995653bf1d497bf2 -size 41326 +oid sha256:2d49dea7969a0e408dfede599f165746bb45c83dc5bac1cc1a7d14e32de13406 +size 5760264 diff --git a/training_data_cleaned/hemolysis/hemo_meta_with_split.csv b/training_data_cleaned/hemolysis/hemo_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..2ef1aac90abd02b1e0b2744569bfbafde4965a74 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0326835d831524088e84ab86b1555cac366219fb5982fca7ac9ddddfc43b1b0 +size 233220 diff --git a/training_data_cleaned/hemolysis/hemo_smiles_meta_with_split.csv b/training_data_cleaned/hemolysis/hemo_smiles_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..a238bb0dbf69175c4b5454875f91d2bed18648ad --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e521d4f4344bebdce1b5aa57f9e7fb1c6dc848319cb980baec38574573f079f +size 4726077 diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/dataset_dict.json b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e8b754a884498fb161ba5373c0cbcd52feb79e4b --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af97dd02933072c0522101cb6c382703093fcdeee3185e509c6edd6fc070b8a +size 16876472 diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/dataset_info.json b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/state.json b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6beac364a8eca17bd45678af8505350ddb5c25ed --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "48acc3da44ca47b8", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..185a1471e5832c5d3fe0a734d602c48fbff761b9 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cb576c6993d53b26152f4cd954ba372c2b7817811ee4da9513036e9d2cc573 +size 4157120 diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/dataset_info.json b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/state.json b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..932725dad0f4e809bc99396cea52f603167d9cdb --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fd2db53d34e0b66a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/dataset_dict.json b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..037847164e0782f3b3fa8cd6da2ad67b189e324a --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b790e2b015c19f7e0af6b8c412543677d3cdb792591952c98717f5a38eb62fc +size 25054912 diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/dataset_info.json b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/state.json b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d5346bae2aaab83391c2ef1d33546fb59950486 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e02b995aa75a9a40", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2a84a4b1b1776a1eac599361fbeaa51a98c6ae00 --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ace9ae3f6248785f08e15fff28be45e2d4832aaf929008470b055d60b3a523 +size 6268920 diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/dataset_info.json b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/state.json b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef986403e8b8db21e99972c3d114fc38542dec1a --- /dev/null +++ b/training_data_cleaned/hemolysis/hemo_wt_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b7d24c190523afa3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_meta_with_split.csv b/training_data_cleaned/nf/nf_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..f5998324773a84fe2140d1d66d32062e93378f11 --- /dev/null +++ b/training_data_cleaned/nf/nf_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcce644701612db54d5f3505dce201c351234837e3685739b2fde68d53c8cf5e +size 1756049 diff --git a/training_data_cleaned/nf/nf_smiles_meta_with_split.csv b/training_data_cleaned/nf/nf_smiles_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..c73a0c9da4a733889c4f525e041749d666e98e4a --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36214670d615dca1a48df6500c203707cdbf31c88261bf719f7d7c1eabc201c +size 57456921 diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/dataset_dict.json b/training_data_cleaned/nf/nf_smiles_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dbc1f5c4a995bd2ef9ee1b052c2c51d1332b7d0b --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d682c81cf7d3c0ce5f1f11ebf2b097d7f100f2d2a78a8ad8a40b1f38b94cc5a +size 23581248 diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/train/dataset_info.json b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/train/state.json b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1fbe3485974ee37405b23433f870451ee82c384 --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fb6f5e1e2e124220", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..de2a336a5f9fb4e27a595ff5059f287768f7ece5 --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5724627360561f961366f3c57b8de1ed0f6187bbf670bd3a4254da1d0ba571 +size 57618824 diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/val/dataset_info.json b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_smiles_with_embeddings/val/state.json b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef50afcca563c66a099c0edb43bb31563b397f4e --- /dev/null +++ b/training_data_cleaned/nf/nf_smiles_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "51b64b0e80ee5ffd", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/dataset_dict.json b/training_data_cleaned/nf/nf_wt_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/nf/nf_wt_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f7c83929acb35a7c40d150f417eae807a7a6dabd --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8f13aa3f2a7dadafc529956ed192be52d9aa20e592ae9f99ee298f6f22748f +size 71732104 diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/train/dataset_info.json b/training_data_cleaned/nf/nf_wt_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/train/state.json b/training_data_cleaned/nf/nf_wt_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6101c208c233c64e95405713907965e7911de24f --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7e9e61eb2e38bf25", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/nf/nf_wt_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..98b574e378f33d7dff6336dcdaebd76ee01d8fd7 --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480c6294956397a18619267bc07d880e29e96532fe9a9618a052dec3969b46cc +size 17930608 diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/val/dataset_info.json b/training_data_cleaned/nf/nf_wt_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/nf/nf_wt_with_embeddings/val/state.json b/training_data_cleaned/nf/nf_wt_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3122025540619145b8798a7c3653fdb364183e85 --- /dev/null +++ b/training_data_cleaned/nf/nf_wt_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "52fcbc4e0da87fa3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_caco2/caco2_meta_with_split.csv b/training_data_cleaned/permeability_caco2/caco2_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..143791dc98561c9bce408089b72330fffab6cea7 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8f0fb32da50e69eafd3e585d68c7876710951fc54c20dd85c2501745dbb38c +size 233334 diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/dataset_dict.json b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..6ec6af6c7c53e5412634388fbcc1a8e9fe95a97c --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ee6d2739e733de4465bc3b3229614c02efd9397c10178b2aac1509a9878b68 +size 1592344 diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/dataset_info.json b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..8dda6fb9a59209053d809c5c6d31003dd524f076 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "float64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/state.json b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..269e3dff3362f68f4bb87865311fe8df66816552 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b67b8e734ab59271", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e92d555bb3a254652ccf6c8ee39db895169a50c5 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e534dfe6744d9fbb89ecea63e54aeac9fcd319dd018f0c7939dfad856eb9599d +size 392528 diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/dataset_info.json b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..8dda6fb9a59209053d809c5c6d31003dd524f076 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "float64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/state.json b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c432ac0f9ae055ad919eaae1c264d81e02a68ab7 --- /dev/null +++ b/training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c5abdd50b2a6a84c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_pampa/pampa_meta_with_split.csv b/training_data_cleaned/permeability_pampa/pampa_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..f2195b98d28b57e61360600e625a223118798544 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04d3767f03a4846003f404db6d03f8392ef9ad73830546064769beded3cfa80 +size 2180806 diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/dataset_dict.json b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0eecb0606fb050facdc5dce23d77b2b1c8da83bb --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8d9409bc3e1f8ea60bf13881e59b431b494093d8cb211ecad75d9940ee9957 +size 16838472 diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/dataset_info.json b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..8dda6fb9a59209053d809c5c6d31003dd524f076 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "float64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/state.json b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..97112e0af0e587f25e8db709341d4738e31ec573 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b3fc67db512e6dff", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0460ea1b5978f1823de87aa3c3011b2f53d7d585 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ce6ce347c39cc97353985efbd9d57a18fa61c0cec0c258293008d06d04a1fe +size 5412880 diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/dataset_info.json b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..8dda6fb9a59209053d809c5c6d31003dd524f076 --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "float64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/state.json b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..891a4598c88c5d2a2d3d011db84f43720724de1d --- /dev/null +++ b/training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "10810b5ed6df45a9", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/solubility/sol_meta_with_split.csv b/training_data_cleaned/solubility/sol_meta_with_split.csv new file mode 100644 index 0000000000000000000000000000000000000000..443d94951ed5ae87385aae8fe231d7049b87c0bf --- /dev/null +++ b/training_data_cleaned/solubility/sol_meta_with_split.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9621a30eff95c3345e825557437befbb021d58bd608d71a6e47fdb03ccdcc3 +size 2928410 diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/dataset_dict.json b/training_data_cleaned/solubility/sol_wt_with_embeddings/dataset_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..eda4a18f1db7243cbbde261db68cf05ae54dae74 --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/dataset_dict.json @@ -0,0 +1 @@ +{"splits": ["train", "val"]} \ No newline at end of file diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/train/data-00000-of-00001.arrow b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..1919b3768016e406bc821196215b8b130a36393d --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c98ec7e6f1d0fd673fbfcc17bddc296163c84f932c90d767002214004749fe8c +size 77876848 diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/train/dataset_info.json b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/train/state.json b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..69f1a5d8ad6bccf805a6d4844c1fb7f0c822d873 --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/train/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2e7260d06cdf4b0a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/val/data-00000-of-00001.arrow b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..46cfffd97a92e24de58e737104dfc836027dc887 --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c76cc6d883b1ad797f40d5f25cdbfba5e08989436004b7b8e1ee0cb509c079 +size 19471728 diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/val/dataset_info.json b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..3b1be902ba0361c345a756bbe936d9ca5c70958d --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/dataset_info.json @@ -0,0 +1,23 @@ +{ + "citation": "", + "description": "", + "features": { + "sequence": { + "dtype": "string", + "_type": "Value" + }, + "embedding": { + "feature": { + "dtype": "float32", + "_type": "Value" + }, + "_type": "List" + }, + "label": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/training_data_cleaned/solubility/sol_wt_with_embeddings/val/state.json b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef620b13300b103e96e8fb7d345cc70ebca06303 --- /dev/null +++ b/training_data_cleaned/solubility/sol_wt_with_embeddings/val/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "499986169da9afde", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file