| { | |
| "project": { | |
| "name": "GDM Risk Prediction - France", | |
| "objective": "Prediction of gestational diabetes mellitus risk at the 1st trimester without biological variables", | |
| "version": "1.0.0", | |
| "trained_at": "2026-05-26T16:31:03", | |
| "status": "validated" | |
| }, | |
| "selected_model": { | |
| "name": "Logistic Regression with Isotonic Calibration", | |
| "short_name": "dg_lr_isotonic", | |
| "bundle_path": "artifacts/models/dg_lr_isotonic_bundle.joblib", | |
| "rationale": "Best AUC-ROC (0.759) and AUC-PR (0.334) among all candidates; best Brier Score (0.088); interpretable for clinical context" | |
| }, | |
| "candidate_models": [ | |
| { | |
| "name": "Logistic Regression", | |
| "AUC_ROC": 0.759, | |
| "AUC_PR": 0.334, | |
| "Brier_Score": 0.088, | |
| "selected": true | |
| }, | |
| { | |
| "name": "XGBoost", | |
| "AUC_ROC": 0.753, | |
| "AUC_PR": 0.32, | |
| "Brier_Score": 0.089, | |
| "selected": false | |
| }, | |
| { | |
| "name": "Random Forest", | |
| "AUC_ROC": 0.748, | |
| "AUC_PR": 0.298, | |
| "Brier_Score": 0.09, | |
| "selected": false | |
| }, | |
| { | |
| "name": "SVM", | |
| "AUC_ROC": 0.76, | |
| "AUC_PR": 0.335, | |
| "Brier_Score": 0.088, | |
| "selected": false, | |
| "sensitivity_internal": 0.884, | |
| "specificity_internal": 0.413, | |
| "external_detection_rate": 0.947, | |
| "external_FN": 13, | |
| "DCA_points_utiles": 99 | |
| } | |
| ], | |
| "thresholding": { | |
| "decision_threshold": 0.06, | |
| "threshold_file": "artifacts/threshold.json", | |
| "rationale": "Threshold optimized for high sensitivity (88.7%) and high NPV (96.6%) to minimize false negatives in a screening context" | |
| }, | |
| "performance": { | |
| "dataset": "Internal test set (80/20 stratified split, n=30000)", | |
| "AUC_ROC": 0.759394470526865, | |
| "AUC_PR": 0.3337401809714899, | |
| "Brier_Score": 0.088, | |
| "sensitivity": 0.8874074074074074, | |
| "specificity": 0.4060093896713615, | |
| "VPP": 0.15922381711855396, | |
| "VPN": 0.966041108132261, | |
| "F1_score": 0.27000225377507325, | |
| "balanced_accuracy": 0.6467083985393844, | |
| "MCC": 0.19171550055591624, | |
| "TP": 599, | |
| "FP": 3163, | |
| "TN": 2162, | |
| "FN": 76, | |
| "metrics_file": "artifacts/metrics_final.json" | |
| }, | |
| "external_validation": { | |
| "cohort_positive_243": { | |
| "n": 243, | |
| "detection_rate_LR": 0.9259, | |
| "false_negatives_LR": 18 | |
| }, | |
| "cohort_mixed_455": { | |
| "n": 455, | |
| "description": "243 positive + 212 negative cases" | |
| } | |
| }, | |
| "data": { | |
| "dataset_name": "dataset_dg_france_30000_final", | |
| "n_rows": 30000, | |
| "n_features": 18, | |
| "target": "gdm_label", | |
| "target_values": [ | |
| "Non", | |
| "Oui" | |
| ], | |
| "prevalence_train": "imbalanced (~22% positive)", | |
| "class_balancing": "class_weight=balanced" | |
| }, | |
| "features": { | |
| "input_schema_path": "artifacts/feature_schema/input_schema.json", | |
| "n_features": 18, | |
| "features_used": [ | |
| "age_maternel", | |
| "parite", | |
| "niveau_etude", | |
| "zone_residence", | |
| "imc", | |
| "ta_systolique", | |
| "ta_diastolique", | |
| "hta_chronique", | |
| "sedentarite", | |
| "tabagisme", | |
| "alcoolisme", | |
| "atcd_gdm", | |
| "atcd_macrosomie", | |
| "atcd_preeclampsie", | |
| "atcd_familial_diabete_1er_deg", | |
| "sopk", | |
| "grossesse_multiple", | |
| "sa_premiere_consult" | |
| ], | |
| "excluded": { | |
| "identifiers": [ | |
| "patient_id", | |
| "centre_id", | |
| "pays" | |
| ], | |
| "administrative": [ | |
| "annee_inclusion" | |
| ], | |
| "biological_leakage": [ | |
| "glycemie_jeun_1T", | |
| "ogtt_0min", | |
| "ogtt_60min", | |
| "ogtt_120min", | |
| "critere_dg" | |
| ], | |
| "collinear": [ | |
| "poids_kg", | |
| "taille_cm" | |
| ] | |
| } | |
| }, | |
| "preprocessing": { | |
| "pipeline": "sklearn.pipeline.Pipeline (embedded in bundle)", | |
| "categorical_missing": "filled with Non_renseigne before encoding", | |
| "numeric_missing": "SimpleImputer(strategy=median)", | |
| "encoding": "OneHotEncoder(handle_unknown=ignore)", | |
| "scaling": "StandardScaler on numeric features", | |
| "note": "Full preprocessing pipeline is saved inside the .joblib bundle — no separate preprocessing step needed at inference" | |
| }, | |
| "inference": { | |
| "how_to_load": "import joblib; bundle = joblib.load(\"artifacts/models/dg_lr_isotonic_bundle.joblib\")", | |
| "bundle_keys": [ | |
| "model", | |
| "preprocessor", | |
| "feature_names", | |
| "threshold", | |
| "calibrator" | |
| ], | |
| "prediction_example": "proba = bundle[\"model\"].predict_proba(X_preprocessed)[:,1]; label = int(proba[0] >= bundle[\"threshold\"])" | |
| }, | |
| "compliance": { | |
| "disclaimer": "Research prototype only. Not validated as a medical device. Not to be used for autonomous clinical decision-making without appropriate medical, ethical, institutional and regulatory validation.", | |
| "data_not_included": true, | |
| "data_reason": "Raw and processed data contain potentially sensitive patient information and are excluded from version control per GDPR principles" | |
| } | |
| } |