Version 1.0.0
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- AMAX_MLP1/summary.json +0 -61
- AMAX_RF1/AMAX_RF1.pkl +0 -3
- AMAX_RF1/summary.json +0 -63
- AMAX_XGB1/summary.json +0 -66
- README.md +1 -1
- AMAX_MLP1/AMAX_MLP1.pth → models/AMAX_MLP1/AMAX_MLP1.pt +2 -2
- AMAX_XGB1/AMAX_XGB1.pkl → models/AMAX_XGB1/AMAX_XGB1.json +2 -2
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
.gitattributes
CHANGED
|
@@ -7,6 +7,7 @@
|
|
| 7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
| 11 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.model filter=lfs diff=lfs merge=lfs -text
|
AMAX_MLP1/summary.json
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_info": {
|
| 3 |
-
"name": "AMAX_MLP1",
|
| 4 |
-
"type": "Multi-Layer Perceptron",
|
| 5 |
-
"framework": "PyTorch",
|
| 6 |
-
"architecture": "Sequential Neural Network",
|
| 7 |
-
"created_date": "2024-10-12"
|
| 8 |
-
},
|
| 9 |
-
"architecture": {
|
| 10 |
-
"input_size": 312,
|
| 11 |
-
"hidden_layers": [1024, 512],
|
| 12 |
-
"output_size": 1,
|
| 13 |
-
"activation": "tanh",
|
| 14 |
-
"dropout_rate": 0.1,
|
| 15 |
-
"total_parameters": "~1.2M",
|
| 16 |
-
"model_depth": 4
|
| 17 |
-
},
|
| 18 |
-
"training_config": {
|
| 19 |
-
"optimizer": "Adam",
|
| 20 |
-
"learning_rate": 0.01,
|
| 21 |
-
"batch_size": 2048,
|
| 22 |
-
"max_epochs": 150,
|
| 23 |
-
"patience": 25,
|
| 24 |
-
"early_stopping": true,
|
| 25 |
-
"device": "cuda",
|
| 26 |
-
"data_leakage": false
|
| 27 |
-
},
|
| 28 |
-
"performance_metrics": {
|
| 29 |
-
"validation": {
|
| 30 |
-
"r2_score": 0.8664,
|
| 31 |
-
"mae": 26.187,
|
| 32 |
-
"rmse": 42.885,
|
| 33 |
-
"realistic": true
|
| 34 |
-
},
|
| 35 |
-
"test": {
|
| 36 |
-
"r2_score": 0.8913,
|
| 37 |
-
"mae": 23.956,
|
| 38 |
-
"rmse": 38.680,
|
| 39 |
-
"realistic": true
|
| 40 |
-
}
|
| 41 |
-
},
|
| 42 |
-
"data_info": {
|
| 43 |
-
"training_samples": 32010,
|
| 44 |
-
"validation_samples": 4001,
|
| 45 |
-
"test_samples": 4002,
|
| 46 |
-
"features": 312,
|
| 47 |
-
"feature_type": "RDKit molecular descriptors",
|
| 48 |
-
"target": "lambda_max (nm)",
|
| 49 |
-
"data_split": "random"
|
| 50 |
-
},
|
| 51 |
-
"feature_importance": {
|
| 52 |
-
"method": "permutation_importance",
|
| 53 |
-
"top_descriptors": [
|
| 54 |
-
"BertzCT_comp",
|
| 55 |
-
"NumHAcceptors_comp",
|
| 56 |
-
"SLogP_VSA8_comp",
|
| 57 |
-
"SMR_VSA8_comp",
|
| 58 |
-
"fr_halogen_comp"
|
| 59 |
-
]
|
| 60 |
-
}
|
| 61 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AMAX_RF1/AMAX_RF1.pkl
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:745133fdcad01a03aec3d82bb8311c62795869c20698b1de9081ff8ded4e7473
|
| 3 |
-
size 1226818065
|
|
|
|
|
|
|
|
|
|
|
|
AMAX_RF1/summary.json
DELETED
|
@@ -1,63 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_info": {
|
| 3 |
-
"name": "AMAX_RF1",
|
| 4 |
-
"type": "Random Forest Regressor",
|
| 5 |
-
"framework": "Scikit-learn",
|
| 6 |
-
"architecture": "Ensemble of Decision Trees",
|
| 7 |
-
"created_date": "2024-10-13",
|
| 8 |
-
"status": "Production Ready (Retrained)"
|
| 9 |
-
},
|
| 10 |
-
"architecture": {
|
| 11 |
-
"n_estimators": 500,
|
| 12 |
-
"max_depth": null,
|
| 13 |
-
"min_samples_split": 2,
|
| 14 |
-
"min_samples_leaf": 1,
|
| 15 |
-
"max_features": "sqrt",
|
| 16 |
-
"bootstrap": true,
|
| 17 |
-
"random_state": 42,
|
| 18 |
-
"n_jobs": 32
|
| 19 |
-
},
|
| 20 |
-
"training_config": {
|
| 21 |
-
"hyperparameter_search": "GridSearchCV",
|
| 22 |
-
"cv_folds": 3,
|
| 23 |
-
"scoring_metric": "neg_mean_absolute_error",
|
| 24 |
-
"parameter_combinations": 32,
|
| 25 |
-
"training_data_only": true,
|
| 26 |
-
"data_leakage": false,
|
| 27 |
-
"retrained": true
|
| 28 |
-
},
|
| 29 |
-
"performance_metrics": {
|
| 30 |
-
"validation": {
|
| 31 |
-
"r2_score": 0.8865,
|
| 32 |
-
"mae": 20.412,
|
| 33 |
-
"rmse": 39.519,
|
| 34 |
-
"realistic": true,
|
| 35 |
-
"note": "Expected after retraining without data leakage"
|
| 36 |
-
},
|
| 37 |
-
"test": {
|
| 38 |
-
"r2_score": 0.9035,
|
| 39 |
-
"mae": 18.601,
|
| 40 |
-
"rmse": 36.441,
|
| 41 |
-
"realistic": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"data_info": {
|
| 45 |
-
"training_samples": 32010,
|
| 46 |
-
"validation_samples": 4001,
|
| 47 |
-
"test_samples": 4002,
|
| 48 |
-
"features": 312,
|
| 49 |
-
"feature_type": "RDKit molecular descriptors",
|
| 50 |
-
"target": "lambda_max (nm)",
|
| 51 |
-
"data_split": "random"
|
| 52 |
-
},
|
| 53 |
-
"feature_importance": {
|
| 54 |
-
"method": "built_in_feature_importances",
|
| 55 |
-
"top_descriptors": [
|
| 56 |
-
"NumAliphaticRings_comp",
|
| 57 |
-
"MaxEStateIndex_comp",
|
| 58 |
-
"NumAliphaticHeterocycles_comp",
|
| 59 |
-
"PEOE_VSA8_comp",
|
| 60 |
-
"SMR_VSA9_comp"
|
| 61 |
-
]
|
| 62 |
-
}
|
| 63 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AMAX_XGB1/summary.json
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_info": {
|
| 3 |
-
"name": "AMAX_XGB1",
|
| 4 |
-
"type": "XGBoost Regressor",
|
| 5 |
-
"framework": "XGBoost",
|
| 6 |
-
"architecture": "Gradient Boosting Decision Trees",
|
| 7 |
-
"created_date": "2024-10-13",
|
| 8 |
-
"status": "Production Ready (Retrained)"
|
| 9 |
-
},
|
| 10 |
-
"architecture": {
|
| 11 |
-
"n_estimators": 500,
|
| 12 |
-
"max_depth": 9,
|
| 13 |
-
"learning_rate": 0.1,
|
| 14 |
-
"subsample": 0.8,
|
| 15 |
-
"colsample_bytree": 0.8,
|
| 16 |
-
"reg_alpha": 0.0,
|
| 17 |
-
"reg_lambda": 1.0,
|
| 18 |
-
"random_state": 42,
|
| 19 |
-
"tree_method": "hist",
|
| 20 |
-
"device": "cuda"
|
| 21 |
-
},
|
| 22 |
-
"training_config": {
|
| 23 |
-
"hyperparameter_search": "GridSearchCV",
|
| 24 |
-
"cv_folds": 3,
|
| 25 |
-
"scoring_metric": "neg_mean_absolute_error",
|
| 26 |
-
"parameter_combinations": 32,
|
| 27 |
-
"training_data_only": true,
|
| 28 |
-
"data_leakage": false,
|
| 29 |
-
"retrained": true,
|
| 30 |
-
"gpu_acceleration": true
|
| 31 |
-
},
|
| 32 |
-
"performance_metrics": {
|
| 33 |
-
"validation": {
|
| 34 |
-
"r2_score": 0.8882,
|
| 35 |
-
"mae": 19.567,
|
| 36 |
-
"rmse": 39.219,
|
| 37 |
-
"realistic": true,
|
| 38 |
-
"note": "Expected after retraining without data leakage"
|
| 39 |
-
},
|
| 40 |
-
"test": {
|
| 41 |
-
"r2_score": 0.9084,
|
| 42 |
-
"mae": 17.682,
|
| 43 |
-
"rmse": 35.507,
|
| 44 |
-
"realistic": true
|
| 45 |
-
}
|
| 46 |
-
},
|
| 47 |
-
"data_info": {
|
| 48 |
-
"training_samples": 32010,
|
| 49 |
-
"validation_samples": 4001,
|
| 50 |
-
"test_samples": 4002,
|
| 51 |
-
"features": 312,
|
| 52 |
-
"feature_type": "RDKit molecular descriptors",
|
| 53 |
-
"target": "lambda_max (nm)",
|
| 54 |
-
"data_split": "random"
|
| 55 |
-
},
|
| 56 |
-
"feature_importance": {
|
| 57 |
-
"method": "built_in_feature_importances",
|
| 58 |
-
"top_descriptors": [
|
| 59 |
-
"NumAliphaticRings_comp",
|
| 60 |
-
"fr_Imine_comp",
|
| 61 |
-
"NumAliphaticHeterocycles_comp",
|
| 62 |
-
"fr_azo_comp",
|
| 63 |
-
"fr_COO_comp"
|
| 64 |
-
]
|
| 65 |
-
}
|
| 66 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -25,7 +25,7 @@ All models utilize **312 RDKit molecular descriptors** combining both compound a
|
|
| 25 |
If you use an AMAX prediction model in your research, please cite:
|
| 26 |
|
| 27 |
```bibtex
|
| 28 |
-
@
|
| 29 |
title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
|
| 30 |
author={Leung, Nathan},
|
| 31 |
institution={Coley Research Group @ MIT}
|
|
|
|
| 25 |
If you use an AMAX prediction model in your research, please cite:
|
| 26 |
|
| 27 |
```bibtex
|
| 28 |
+
@modelcollection{amaxmodels,
|
| 29 |
title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
|
| 30 |
author={Leung, Nathan},
|
| 31 |
institution={Coley Research Group @ MIT}
|
AMAX_MLP1/AMAX_MLP1.pth → models/AMAX_MLP1/AMAX_MLP1.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc151395fa8679dea813ca62688f30d7e61266101c232f68fd536703a88825b7
|
| 3 |
+
size 8211712
|
AMAX_XGB1/AMAX_XGB1.pkl → models/AMAX_XGB1/AMAX_XGB1.json
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3e20d9de1e244b7fc5bf4504a763f975ddcf521af6b5ce6e344127639ea243b
|
| 3 |
+
size 17963573
|