Version 1.0.0

Browse files

Files changed (9) hide show

.DS_Store +0 -0
.gitattributes +1 -0
AMAX_MLP1/summary.json +0 -61
AMAX_RF1/AMAX_RF1.pkl +0 -3
AMAX_RF1/summary.json +0 -63
AMAX_XGB1/summary.json +0 -66
README.md +1 -1
AMAX_MLP1/AMAX_MLP1.pth → models/AMAX_MLP1/AMAX_MLP1.pt +2 -2
AMAX_XGB1/AMAX_XGB1.pkl → models/AMAX_XGB1/AMAX_XGB1.json +2 -2

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

.gitattributes CHANGED Viewed

@@ -7,6 +7,7 @@
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text

 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text

AMAX_MLP1/summary.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "model_info": {
-    "name": "AMAX_MLP1",
-    "type": "Multi-Layer Perceptron",
-    "framework": "PyTorch",
-    "architecture": "Sequential Neural Network",
-    "created_date": "2024-10-12"
-  },
-  "architecture": {
-    "input_size": 312,
-    "hidden_layers": [1024, 512],
-    "output_size": 1,
-    "activation": "tanh",
-    "dropout_rate": 0.1,
-    "total_parameters": "~1.2M",
-    "model_depth": 4
-  },
-  "training_config": {
-    "optimizer": "Adam",
-    "learning_rate": 0.01,
-    "batch_size": 2048,
-    "max_epochs": 150,
-    "patience": 25,
-    "early_stopping": true,
-    "device": "cuda",
-    "data_leakage": false
-  },
-  "performance_metrics": {
-    "validation": {
-      "r2_score": 0.8664,
-      "mae": 26.187,
-      "rmse": 42.885,
-      "realistic": true
-    },
-    "test": {
-      "r2_score": 0.8913,
-      "mae": 23.956,
-      "rmse": 38.680,
-      "realistic": true
-    }
-  },
-  "data_info": {
-    "training_samples": 32010,
-    "validation_samples": 4001,
-    "test_samples": 4002,
-    "features": 312,
-    "feature_type": "RDKit molecular descriptors",
-    "target": "lambda_max (nm)",
-    "data_split": "random"
-  },
-  "feature_importance": {
-    "method": "permutation_importance",
-    "top_descriptors": [
-      "BertzCT_comp",
-      "NumHAcceptors_comp",
-      "SLogP_VSA8_comp",
-      "SMR_VSA8_comp",
-      "fr_halogen_comp"
-    ]
-  }
-}

AMAX_RF1/AMAX_RF1.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:745133fdcad01a03aec3d82bb8311c62795869c20698b1de9081ff8ded4e7473
-size 1226818065

AMAX_RF1/summary.json DELETED Viewed

@@ -1,63 +0,0 @@
-{
-  "model_info": {
-    "name": "AMAX_RF1",
-    "type": "Random Forest Regressor",
-    "framework": "Scikit-learn",
-    "architecture": "Ensemble of Decision Trees",
-    "created_date": "2024-10-13",
-    "status": "Production Ready (Retrained)"
-  },
-  "architecture": {
-    "n_estimators": 500,
-    "max_depth": null,
-    "min_samples_split": 2,
-    "min_samples_leaf": 1,
-    "max_features": "sqrt",
-    "bootstrap": true,
-    "random_state": 42,
-    "n_jobs": 32
-  },
-  "training_config": {
-    "hyperparameter_search": "GridSearchCV",
-    "cv_folds": 3,
-    "scoring_metric": "neg_mean_absolute_error",
-    "parameter_combinations": 32,
-    "training_data_only": true,
-    "data_leakage": false,
-    "retrained": true
-  },
-  "performance_metrics": {
-    "validation": {
-      "r2_score": 0.8865,
-      "mae": 20.412,
-      "rmse": 39.519,
-      "realistic": true,
-      "note": "Expected after retraining without data leakage"
-    },
-    "test": {
-      "r2_score": 0.9035,
-      "mae": 18.601,
-      "rmse": 36.441,
-      "realistic": true
-    }
-  },
-  "data_info": {
-    "training_samples": 32010,
-    "validation_samples": 4001,
-    "test_samples": 4002,
-    "features": 312,
-    "feature_type": "RDKit molecular descriptors",
-    "target": "lambda_max (nm)",
-    "data_split": "random"
-  },
-  "feature_importance": {
-    "method": "built_in_feature_importances",
-    "top_descriptors": [
-      "NumAliphaticRings_comp",
-      "MaxEStateIndex_comp",
-      "NumAliphaticHeterocycles_comp",
-      "PEOE_VSA8_comp",
-      "SMR_VSA9_comp"
-    ]
-  }
-}

AMAX_XGB1/summary.json DELETED Viewed

@@ -1,66 +0,0 @@
-{
-  "model_info": {
-    "name": "AMAX_XGB1",
-    "type": "XGBoost Regressor",
-    "framework": "XGBoost",
-    "architecture": "Gradient Boosting Decision Trees",
-    "created_date": "2024-10-13",
-    "status": "Production Ready (Retrained)"
-  },
-  "architecture": {
-    "n_estimators": 500,
-    "max_depth": 9,
-    "learning_rate": 0.1,
-    "subsample": 0.8,
-    "colsample_bytree": 0.8,
-    "reg_alpha": 0.0,
-    "reg_lambda": 1.0,
-    "random_state": 42,
-    "tree_method": "hist",
-    "device": "cuda"
-  },
-  "training_config": {
-    "hyperparameter_search": "GridSearchCV",
-    "cv_folds": 3,
-    "scoring_metric": "neg_mean_absolute_error",
-    "parameter_combinations": 32,
-    "training_data_only": true,
-    "data_leakage": false,
-    "retrained": true,
-    "gpu_acceleration": true
-  },
-  "performance_metrics": {
-    "validation": {
-      "r2_score": 0.8882,
-      "mae": 19.567,
-      "rmse": 39.219,
-      "realistic": true,
-      "note": "Expected after retraining without data leakage"
-    },
-    "test": {
-      "r2_score": 0.9084,
-      "mae": 17.682,
-      "rmse": 35.507,
-      "realistic": true
-    }
-  },
-  "data_info": {
-    "training_samples": 32010,
-    "validation_samples": 4001,
-    "test_samples": 4002,
-    "features": 312,
-    "feature_type": "RDKit molecular descriptors",
-    "target": "lambda_max (nm)",
-    "data_split": "random"
-  },
-  "feature_importance": {
-    "method": "built_in_feature_importances",
-    "top_descriptors": [
-      "NumAliphaticRings_comp",
-      "fr_Imine_comp",
-      "NumAliphaticHeterocycles_comp",
-      "fr_azo_comp",
-      "fr_COO_comp"
-    ]
-  }
-}

README.md CHANGED Viewed

@@ -25,7 +25,7 @@ All models utilize **312 RDKit molecular descriptors** combining both compound a
 If you use an AMAX prediction model in your research, please cite:
 ```bibtex
-@misc{amaxmodels,
   title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
   author={Leung, Nathan},
   institution={Coley Research Group @ MIT}

 If you use an AMAX prediction model in your research, please cite:
 ```bibtex
+@modelcollection{amaxmodels,
   title={AMAX-Models: Machine Learning Models for Molecular Absorption Wavelength Prediction},
   author={Leung, Nathan},
   institution={Coley Research Group @ MIT}

AMAX_MLP1/AMAX_MLP1.pth → models/AMAX_MLP1/AMAX_MLP1.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf9816fdd6f1243656b8553a8f3f32caf288ed0928f10927939ba4cf53bbc0e0
-size 3405421

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc151395fa8679dea813ca62688f30d7e61266101c232f68fd536703a88825b7
+size 8211712

AMAX_XGB1/AMAX_XGB1.pkl → models/AMAX_XGB1/AMAX_XGB1.json RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7a42db68102b384847ac68a3940887682cdf0d201fa0a1e1307ed87de1927c6
-size 8038457

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3e20d9de1e244b7fc5bf4504a763f975ddcf521af6b5ce6e344127639ea243b
+size 17963573