natelgrw commited on
Commit
8401b89
·
1 Parent(s): e966be6

Version 1.0.0

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitattributes CHANGED
@@ -7,6 +7,7 @@
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
 
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
 
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.json filter=lfs diff=lfs merge=lfs -text
11
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
12
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
  *.model filter=lfs diff=lfs merge=lfs -text
AMAX_MLP1/summary.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "model_info": {
3
- "name": "AMAX_MLP1",
4
- "type": "Multi-Layer Perceptron",
5
- "framework": "PyTorch",
6
- "architecture": "Sequential Neural Network",
7
- "created_date": "2024-10-12"
8
- },
9
- "architecture": {
10
- "input_size": 312,
11
- "hidden_layers": [1024, 512],
12
- "output_size": 1,
13
- "activation": "tanh",
14
- "dropout_rate": 0.1,
15
- "total_parameters": "~1.2M",
16
- "model_depth": 4
17
- },
18
- "training_config": {
19
- "optimizer": "Adam",
20
- "learning_rate": 0.01,
21
- "batch_size": 2048,
22
- "max_epochs": 150,
23
- "patience": 25,
24
- "early_stopping": true,
25
- "device": "cuda",
26
- "data_leakage": false
27
- },
28
- "performance_metrics": {
29
- "validation": {
30
- "r2_score": 0.8664,
31
- "mae": 26.187,
32
- "rmse": 42.885,
33
- "realistic": true
34
- },
35
- "test": {
36
- "r2_score": 0.8913,
37
- "mae": 23.956,
38
- "rmse": 38.680,
39
- "realistic": true
40
- }
41
- },
42
- "data_info": {
43
- "training_samples": 32010,
44
- "validation_samples": 4001,
45
- "test_samples": 4002,
46
- "features": 312,
47
- "feature_type": "RDKit molecular descriptors",
48
- "target": "lambda_max (nm)",
49
- "data_split": "random"
50
- },
51
- "feature_importance": {
52
- "method": "permutation_importance",
53
- "top_descriptors": [
54
- "BertzCT_comp",
55
- "NumHAcceptors_comp",
56
- "SLogP_VSA8_comp",
57
- "SMR_VSA8_comp",
58
- "fr_halogen_comp"
59
- ]
60
- }
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AMAX_RF1/AMAX_RF1.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:745133fdcad01a03aec3d82bb8311c62795869c20698b1de9081ff8ded4e7473
3
- size 1226818065
 
 
 
 
AMAX_RF1/summary.json DELETED
@@ -1,63 +0,0 @@
1
- {
2
- "model_info": {
3
- "name": "AMAX_RF1",
4
- "type": "Random Forest Regressor",
5
- "framework": "Scikit-learn",
6
- "architecture": "Ensemble of Decision Trees",
7
- "created_date": "2024-10-13",
8
- "status": "Production Ready (Retrained)"
9
- },
10
- "architecture": {
11
- "n_estimators": 500,
12
- "max_depth": null,
13
- "min_samples_split": 2,
14
- "min_samples_leaf": 1,
15
- "max_features": "sqrt",
16
- "bootstrap": true,
17
- "random_state": 42,
18
- "n_jobs": 32
19
- },
20
- "training_config": {
21
- "hyperparameter_search": "GridSearchCV",
22
- "cv_folds": 3,
23
- "scoring_metric": "neg_mean_absolute_error",
24
- "parameter_combinations": 32,
25
- "training_data_only": true,
26
- "data_leakage": false,
27
- "retrained": true
28
- },
29
- "performance_metrics": {
30
- "validation": {
31
- "r2_score": 0.8865,
32
- "mae": 20.412,
33
- "rmse": 39.519,
34
- "realistic": true,
35
- "note": "Expected after retraining without data leakage"
36
- },
37
- "test": {
38
- "r2_score": 0.9035,
39
- "mae": 18.601,
40
- "rmse": 36.441,
41
- "realistic": true
42
- }
43
- },
44
- "data_info": {
45
- "training_samples": 32010,
46
- "validation_samples": 4001,
47
- "test_samples": 4002,
48
- "features": 312,
49
- "feature_type": "RDKit molecular descriptors",
50
- "target": "lambda_max (nm)",
51
- "data_split": "random"
52
- },
53
- "feature_importance": {
54
- "method": "built_in_feature_importances",
55
- "top_descriptors": [
56
- "NumAliphaticRings_comp",
57
- "MaxEStateIndex_comp",
58
- "NumAliphaticHeterocycles_comp",
59
- "PEOE_VSA8_comp",
60
- "SMR_VSA9_comp"
61
- ]
62
- }
63
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AMAX_XGB1/summary.json DELETED
@@ -1,66 +0,0 @@
1
- {
2
- "model_info": {
3
- "name": "AMAX_XGB1",
4
- "type": "XGBoost Regressor",
5
- "framework": "XGBoost",
6
- "architecture": "Gradient Boosting Decision Trees",
7
- "created_date": "2024-10-13",
8
- "status": "Production Ready (Retrained)"
9
- },
10
- "architecture": {
11
- "n_estimators": 500,
12
- "max_depth": 9,
13
- "learning_rate": 0.1,
14
- "subsample": 0.8,
15
- "colsample_bytree": 0.8,
16
- "reg_alpha": 0.0,
17
- "reg_lambda": 1.0,
18
- "random_state": 42,
19
- "tree_method": "hist",
20
- "device": "cuda"
21
- },
22
- "training_config": {
23
- "hyperparameter_search": "GridSearchCV",
24
- "cv_folds": 3,
25
- "scoring_metric": "neg_mean_absolute_error",
26
- "parameter_combinations": 32,
27
- "training_data_only": true,
28
- "data_leakage": false,
29
- "retrained": true,
30
- "gpu_acceleration": true
31
- },
32
- "performance_metrics": {
33
- "validation": {
34
- "r2_score": 0.8882,
35
- "mae": 19.567,
36
- "rmse": 39.219,
37
- "realistic": true,
38
- "note": "Expected after retraining without data leakage"
39
- },
40
- "test": {
41
- "r2_score": 0.9084,
42
- "mae": 17.682,
43
- "rmse": 35.507,
44
- "realistic": true
45
- }
46
- },
47
- "data_info": {
48
- "training_samples": 32010,
49
- "validation_samples": 4001,
50
- "test_samples": 4002,
51
- "features": 312,
52
- "feature_type": "RDKit molecular descriptors",
53
- "target": "lambda_max (nm)",
54
- "data_split": "random"
55
- },
56
- "feature_importance": {
57
- "method": "built_in_feature_importances",
58
- "top_descriptors": [
59
- "NumAliphaticRings_comp",
60
- "fr_Imine_comp",
61
- "NumAliphaticHeterocycles_comp",
62
- "fr_azo_comp",
63
- "fr_COO_comp"
64
- ]
65
- }
66
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -25,7 +25,7 @@ All models utilize **312 RDKit molecular descriptors** combining both compound a
25
  If you use an AMAX prediction model in your research, please cite:
26
 
27
  ```bibtex
28
- @misc{amaxmodels,
29
  title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
30
  author={Leung, Nathan},
31
  institution={Coley Research Group @ MIT}
 
25
  If you use an AMAX prediction model in your research, please cite:
26
 
27
  ```bibtex
28
+ @modelcollection{amaxmodels,
29
  title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
30
  author={Leung, Nathan},
31
  institution={Coley Research Group @ MIT}
AMAX_MLP1/AMAX_MLP1.pth → models/AMAX_MLP1/AMAX_MLP1.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf9816fdd6f1243656b8553a8f3f32caf288ed0928f10927939ba4cf53bbc0e0
3
- size 3405421
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc151395fa8679dea813ca62688f30d7e61266101c232f68fd536703a88825b7
3
+ size 8211712
AMAX_XGB1/AMAX_XGB1.pkl → models/AMAX_XGB1/AMAX_XGB1.json RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a42db68102b384847ac68a3940887682cdf0d201fa0a1e1307ed87de1927c6
3
- size 8038457
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e20d9de1e244b7fc5bf4504a763f975ddcf521af6b5ce6e344127639ea243b
3
+ size 17963573