yinuozhang commited on
Commit
b8c6018
Β·
1 Parent(s): 284f956

update unpooled data

Browse files
Files changed (50) hide show
  1. training_data_cleaned/data_split.ipynb +3 -0
  2. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/data-00000-of-00001.arrow +1 -1
  3. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/dataset_info.json +4 -4
  4. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/state.json +1 -1
  5. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/data-00000-of-00001.arrow +1 -1
  6. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/dataset_info.json +4 -4
  7. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/state.json +1 -1
  8. training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ hemolysis/hemo_smiles_with_embeddings_unpooled}/dataset_dict.json +0 -0
  9. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00000-of-00003.arrow +3 -0
  10. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00001-of-00003.arrow +3 -0
  11. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00002-of-00003.arrow +3 -0
  12. training_data_cleaned/{nf/nf_smiles_with_embeddings/val β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/train}/dataset_info.json +16 -2
  13. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/state.json +19 -0
  14. training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/val/data-00000-of-00001.arrow +3 -0
  15. training_data_cleaned/{nf/nf_smiles_with_embeddings/train β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/val}/dataset_info.json +16 -2
  16. training_data_cleaned/{nf/nf_smiles_with_embeddings/train β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/val}/state.json +1 -1
  17. training_data_cleaned/nf_smiles_train.csv +3 -0
  18. training_data_cleaned/nf_smiles_val.csv +3 -0
  19. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/data-00000-of-00001.arrow +1 -1
  20. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/dataset_info.json +4 -4
  21. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/state.json +1 -1
  22. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/data-00000-of-00001.arrow +1 -1
  23. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/dataset_info.json +4 -4
  24. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/state.json +1 -1
  25. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
  26. training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled}/train/data-00000-of-00001.arrow +2 -2
  27. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/train/dataset_info.json +37 -0
  28. training_data_cleaned/{nf/nf_smiles_with_embeddings/val β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled/train}/state.json +1 -1
  29. training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled}/val/data-00000-of-00001.arrow +2 -2
  30. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/val/dataset_info.json +37 -0
  31. training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/val/state.json +13 -0
  32. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/data-00000-of-00001.arrow +2 -2
  33. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/dataset_info.json +4 -4
  34. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/state.json +1 -1
  35. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/data-00000-of-00001.arrow +2 -2
  36. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/dataset_info.json +4 -4
  37. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/state.json +1 -1
  38. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/dataset_dict.json +1 -0
  39. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/data-00000-of-00002.arrow +3 -0
  40. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/data-00001-of-00002.arrow +3 -0
  41. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/dataset_info.json +37 -0
  42. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/state.json +16 -0
  43. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/data-00000-of-00001.arrow +3 -0
  44. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/dataset_info.json +37 -0
  45. training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/state.json +13 -0
  46. training_data_cleaned/smiles_data_split.ipynb +3 -0
  47. training_data_cleaned/solubility/sol_wt_with_embeddings/train/data-00000-of-00001.arrow +1 -1
  48. training_data_cleaned/solubility/sol_wt_with_embeddings/train/state.json +1 -1
  49. training_data_cleaned/solubility/sol_wt_with_embeddings/val/data-00000-of-00001.arrow +1 -1
  50. training_data_cleaned/solubility/sol_wt_with_embeddings/val/state.json +1 -1
training_data_cleaned/data_split.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981339bf1a6594e42a722a42993c238512c3ac572344f68b810f561d4b7b7757
3
+ size 228787
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1af97dd02933072c0522101cb6c382703093fcdeee3185e509c6edd6fc070b8a
3
  size 16876472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517c82caa91e84e9453f70d1884b249b94e2c5b276a495cef91dbddfde8954e4
3
  size 16876472
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "int64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "int64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/train/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "48acc3da44ca47b8",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "b35e488d604a9b33",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40cb576c6993d53b26152f4cd954ba372c2b7817811ee4da9513036e9d2cc573
3
  size 4157120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98048aa731465845cb6c6d0fe71d0482bf6f0c30229dda1302b60ccb44aee185
3
  size 4157120
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "int64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "int64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings/val/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "fd2db53d34e0b66a",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "9f8c3afbbc43100a",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ hemolysis/hemo_smiles_with_embeddings_unpooled}/dataset_dict.json RENAMED
File without changes
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00000-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c964ad117df4f9dedc51f5d0d1ecb1273ab06a1e19b19e2d413aa68cb35ee62
3
+ size 348437176
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00001-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ea7fb2d8111356133e934368a8e3890dffd2eb8935a8ff68cf76ec5b11f7565
3
+ size 357685496
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/data-00002-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2940eaa57037d9534e64cde042c33958e4284c7c32bf95fd9b0294f4abda3cd0
3
+ size 354685520
training_data_cleaned/{nf/nf_smiles_with_embeddings/val β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/train}/dataset_info.json RENAMED
@@ -6,14 +6,28 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
- "dtype": "float32",
 
 
 
 
 
 
 
 
 
 
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
  },
16
- "label": {
17
  "dtype": "int64",
18
  "_type": "Value"
19
  }
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "int64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
  "_type": "Value"
27
  },
28
  "_type": "List"
29
  },
30
+ "length": {
31
  "dtype": "int64",
32
  "_type": "Value"
33
  }
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/train/state.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00003.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00003.arrow"
8
+ },
9
+ {
10
+ "filename": "data-00002-of-00003.arrow"
11
+ }
12
+ ],
13
+ "_fingerprint": "083e82cfa8182e35",
14
+ "_format_columns": null,
15
+ "_format_kwargs": {},
16
+ "_format_type": null,
17
+ "_output_all_columns": false,
18
+ "_split": null
19
+ }
training_data_cleaned/hemolysis/hemo_smiles_with_embeddings_unpooled/val/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0a63196762e2981219166d73fe419215d3389e638a2d65aa2e027c9ce1a4b0
3
+ size 222151392
training_data_cleaned/{nf/nf_smiles_with_embeddings/train β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/val}/dataset_info.json RENAMED
@@ -6,14 +6,28 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
- "dtype": "float32",
 
 
 
 
 
 
 
 
 
 
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
  },
16
- "label": {
17
  "dtype": "int64",
18
  "_type": "Value"
19
  }
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "int64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
  "_type": "Value"
27
  },
28
  "_type": "List"
29
  },
30
+ "length": {
31
  "dtype": "int64",
32
  "_type": "Value"
33
  }
training_data_cleaned/{nf/nf_smiles_with_embeddings/train β†’ hemolysis/hemo_smiles_with_embeddings_unpooled/val}/state.json RENAMED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "fb6f5e1e2e124220",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "22e869fb69c5442d",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/nf_smiles_train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f08b8d9b77fef6da407a6e22765201d8eaf1cff6ae7f0da5d8da261baf64f86
3
+ size 2069832
training_data_cleaned/nf_smiles_val.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2cf82b6cc31686eff6a7931de34ee0975defc460e470e183b208c0513e5f3b
3
+ size 55387144
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1ee6d2739e733de4465bc3b3229614c02efd9397c10178b2aac1509a9878b68
3
  size 1592344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df073d6960518a4d7869843a243419b7dbbebd000d7b550b67dc1bb0ecb8a0c5
3
  size 1592344
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "float64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/train/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "b67b8e734ab59271",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "4aee0d5db1384174",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e534dfe6744d9fbb89ecea63e54aeac9fcd319dd018f0c7939dfad856eb9599d
3
  size 392528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e3ca2644ed58d88038ae92c36f0ac1b818e6318b5bb14b7989ba7ee90ad749
3
  size 392528
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "float64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings/val/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "c5abdd50b2a6a84c",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "9a3005e23a39689f",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "val"]}
training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled}/train/data-00000-of-00001.arrow RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d682c81cf7d3c0ce5f1f11ebf2b097d7f100f2d2a78a8ad8a40b1f38b94cc5a
3
- size 23581248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b2070cae7b5ff65bcd91fd2565f1a0da824ece11d46df7dc5d474f0e6f936f
3
+ size 61446768
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/train/dataset_info.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ },
30
+ "length": {
31
+ "dtype": "int64",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "homepage": "",
36
+ "license": ""
37
+ }
training_data_cleaned/{nf/nf_smiles_with_embeddings/val β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled/train}/state.json RENAMED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "51b64b0e80ee5ffd",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "1bb4c565231b6073",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/{nf/nf_smiles_with_embeddings β†’ permeability_caco2/caco2_smiles_with_embeddings_unpooled}/val/data-00000-of-00001.arrow RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5724627360561f961366f3c57b8de1ed0f6187bbf670bd3a4254da1d0ba571
3
- size 57618824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaab9c33f1ff9332267736717bb816ff11e260911c30d42f288c55296a6eaac
3
+ size 9325880
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/val/dataset_info.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ },
30
+ "length": {
31
+ "dtype": "int64",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "homepage": "",
36
+ "license": ""
37
+ }
training_data_cleaned/permeability_caco2/caco2_smiles_with_embeddings_unpooled/val/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0c58b252fb2fe88a",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac8d9409bc3e1f8ea60bf13881e59b431b494093d8cb211ecad75d9940ee9957
3
- size 16838472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8644bf3f4debadf2e3b2bc282260431d87f71dd1b55a16276f654447d550198
3
+ size 17798720
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "float64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/train/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "b3fc67db512e6dff",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "0ef3d9304dd473ab",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ce6ce347c39cc97353985efbd9d57a18fa61c0cec0c258293008d06d04a1fe
3
- size 5412880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559e863eaeee14d185439ac549acc90109fb4ab9a0aa3746b9b0a910f05c777d
3
+ size 4452520
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/dataset_info.json CHANGED
@@ -6,16 +6,16 @@
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
 
 
 
 
9
  "embedding": {
10
  "feature": {
11
  "dtype": "float32",
12
  "_type": "Value"
13
  },
14
  "_type": "List"
15
- },
16
- "label": {
17
- "dtype": "float64",
18
- "_type": "Value"
19
  }
20
  },
21
  "homepage": "",
 
6
  "dtype": "string",
7
  "_type": "Value"
8
  },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
  "embedding": {
14
  "feature": {
15
  "dtype": "float32",
16
  "_type": "Value"
17
  },
18
  "_type": "List"
 
 
 
 
19
  }
20
  },
21
  "homepage": "",
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings/val/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "10810b5ed6df45a9",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "78a9702fbaf1398b",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "val"]}
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/data-00000-of-00002.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6800a856a9470b7726661126f4e011ac79fc7fb917c0eba9d7c57ca45c3bf6
3
+ size 248381200
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/data-00001-of-00002.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe26ed24d18ee2545e92a9482214e17e81ded9c0187c4180b9a84bad9a20d68e
3
+ size 261972080
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/dataset_info.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ },
30
+ "length": {
31
+ "dtype": "int64",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "homepage": "",
36
+ "license": ""
37
+ }
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/train/state.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00002.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00002.arrow"
8
+ }
9
+ ],
10
+ "_fingerprint": "006f9c8c2f40a84e",
11
+ "_format_columns": null,
12
+ "_format_kwargs": {},
13
+ "_format_type": null,
14
+ "_output_all_columns": false,
15
+ "_split": null
16
+ }
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0eca1f35d35326d2d0e2c617bdf06b39d1c5b67c2064171fffdcd132a14b898
3
+ size 128859184
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/dataset_info.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "sequence": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "label": {
10
+ "dtype": "float64",
11
+ "_type": "Value"
12
+ },
13
+ "embedding": {
14
+ "feature": {
15
+ "feature": {
16
+ "dtype": "float16",
17
+ "_type": "Value"
18
+ },
19
+ "_type": "List"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ },
30
+ "length": {
31
+ "dtype": "int64",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "homepage": "",
36
+ "license": ""
37
+ }
training_data_cleaned/permeability_pampa/pampa_smiles_with_embeddings_unpooled/val/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "1733f09af7ddad5b",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
training_data_cleaned/smiles_data_split.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d55a03c6934dc9ee64f7dbe76d2cf8e042be84b00f8e8bb1c92e2bc6da0c3f
3
+ size 2300353
training_data_cleaned/solubility/sol_wt_with_embeddings/train/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c98ec7e6f1d0fd673fbfcc17bddc296163c84f932c90d767002214004749fe8c
3
  size 77876848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de0dd0298a95c171b3e2ee7f884ed35253c58236297b3309007c450dbb9e052
3
  size 77876848
training_data_cleaned/solubility/sol_wt_with_embeddings/train/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "2e7260d06cdf4b0a",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "d90a2dc9ca4da0d0",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
training_data_cleaned/solubility/sol_wt_with_embeddings/val/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4c76cc6d883b1ad797f40d5f25cdbfba5e08989436004b7b8e1ee0cb509c079
3
  size 19471728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa184e300332aadb8e17e415e6fdd6364e80e7eedab4815367d4efb0d66625ae
3
  size 19471728
training_data_cleaned/solubility/sol_wt_with_embeddings/val/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "499986169da9afde",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "9284a78270833a55",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,