neon-slab-models / ensemble /ensemble_evaluation_summary.json
Herrprofessor's picture
Add Neon benchmark-facing Model C single and ensemble checkpoints
b801ed9 verified
{
"ensemble_manifest_path": "output/ai/slab_hybrid_enhanced_ensemble/ensemble_manifest.json",
"dataset_directory": "output/ai/slab_hybrid_dataset_benchmark",
"model_family": "hybrid",
"model_label": "Model C",
"target_columns": [
"benchmark_normalized_transmission",
"benchmark_normalized_reflection",
"normalized_peak_intensity"
],
"evaluated_splits": [
{
"label": "val",
"num_samples": 18,
"metrics_file": "ensemble_metrics_val.json",
"aggregate_predictions_file": "aggregate_predictions_val.csv",
"member_predictions_file": "member_predictions_val.csv",
"scalar_metrics": {
"overall": {
"mean_mae": 0.11385147245948606,
"mean_rmse": 0.16200101646216186,
"mean_relative_error": 1.0974979104909515
},
"per_target": {
"benchmark_normalized_transmission": {
"mae": 0.07016787163072785,
"rmse": 0.10432158162628831,
"mean_relative_error": 0.0957483947158443
},
"benchmark_normalized_reflection": {
"mae": 0.06792800511245026,
"rmse": 0.10331396236600238,
"mean_relative_error": 3.024337816882757
},
"normalized_peak_intensity": {
"mae": 0.20345854063528002,
"rmse": 0.2783675053941949,
"mean_relative_error": 0.17240751987425307
}
}
},
"uncertainty": {
"per_target": {
"benchmark_normalized_transmission": {
"mean_predictive_std": 0.011360962856081559,
"max_predictive_std": 0.020654394452047805,
"uncertainty_error_correlation": 0.39204238087051924
},
"benchmark_normalized_reflection": {
"mean_predictive_std": 0.012925432852674982,
"max_predictive_std": 0.021405327301865437,
"uncertainty_error_correlation": 0.28746703037274
},
"normalized_peak_intensity": {
"mean_predictive_std": 0.02190956045721411,
"max_predictive_std": 0.0336832882103833,
"uncertainty_error_correlation": 0.14797429903904363
}
},
"overall_mean_predictive_std": 0.01539865205532355,
"overall_uncertainty_error_correlation": 0.2758279034274343
},
"field_metrics": {
"mean_complex_mae": 0.000721128600519949,
"mean_complex_rmse": 0.0008146381402041088,
"mean_intensity_mae": 1.340545608315928e-06,
"mean_intensity_rmse": 1.754440612148272e-06
},
"residual": {
"sample_mean": [
0.021960339017428502,
0.02013098133725155,
0.01828312842747995,
0.017190707527264728,
0.017301100311485264,
0.01838773073667845,
0.04026259414597584,
0.0379905514197858,
0.03499547333604366,
0.03176194109071443,
0.028105838859120318,
0.025007485757205766,
0.02673026021781608,
0.02487619060433287,
0.022882465377153393,
0.021181345226089738,
0.01986505034686897,
0.01937766234800397
],
"sample_max": [
0.04216378405016335,
0.03850025166257747,
0.036845444721246005,
0.035386269717512575,
0.03609167443994837,
0.035790441329578176,
0.16843572313422048,
0.15191189780077552,
0.1370267713733422,
0.12924598248148275,
0.11987766897658741,
0.10133781465060013,
0.07097951012064485,
0.06726651958858765,
0.06623537525454741,
0.062078358540253245,
0.0599625142583727,
0.05019204548902663
],
"global_mean": 0.024793935893705515,
"global_max": 0.16843572313422048
},
"boundary": {
"predicted_sample_mean": [
0.053286029712046434,
0.01493505237043205,
0.0055900197177435485,
0.010108961450473818,
0.015356260224122191,
0.014097947797856301,
0.05907622795572509,
0.00984853780795986,
0.004025617248456489,
0.010581138570006763,
0.01739777359950806,
0.023149915601654445,
0.033092183181977146,
0.006672381692163019,
0.00748599561301617,
0.017473609233720485,
0.018751495446002846,
0.019347931732146544
],
"predicted_sample_max": [
0.05934742937525734,
0.019882576890718446,
0.007527008501604364,
0.011482811360450965,
0.01676645084750481,
0.015264075663181895,
0.0672276086501808,
0.014355306903767701,
0.005044552051534398,
0.012902436109991328,
0.019455641389099665,
0.025760085147803615,
0.038310850194344485,
0.00980513951985764,
0.008875789936434779,
0.019288599012422768,
0.020648035666814363,
0.020357615376506658
],
"predicted_global_mean": 0.018904282164167292,
"predicted_global_max": 0.0672276086501808,
"reference_sample_mean": [
0.06196533212447744,
0.07414241698403592,
0.08646401943370771,
0.09875377580011292,
0.11087360054830242,
0.12272086310187592,
0.058625553082027104,
0.07184065807899906,
0.08522100673835066,
0.09795360041428039,
0.11031711058000586,
0.12233433210338872,
0.0618324566172619,
0.07405542242988561,
0.08639943104366392,
0.09870041288804479,
0.11082656321181317,
0.12267819154020163
],
"reference_sample_max": [
0.06411456725870579,
0.07646451512230698,
0.08884649402792684,
0.1011078840821186,
0.11313288659838022,
0.12483819593456884,
0.060981775023704596,
0.07427944314374434,
0.08767871508664947,
0.10037275209999884,
0.11263328893250345,
0.12449921980906546,
0.06397470822350064,
0.0763752357128823,
0.08878267139877646,
0.10105714391664282,
0.11308950108929428,
0.12479964458686518
],
"reference_global_mean": 0.09198359704002416,
"reference_global_max": 0.12483819593456884
},
"source": {
"predicted_sample_mean": [
0.014415862584684205,
0.01856834161154681,
0.02344346288055675,
0.02890906791481227,
0.03593074495828412,
0.04255444826907865,
0.024886124292793098,
0.029047115121403004,
0.03482757673710617,
0.04437069465820982,
0.05470813870294458,
0.0598188335687294,
0.021410074277844738,
0.025683283674658098,
0.02932374750441966,
0.03596124723854324,
0.04400679469464972,
0.05207618418017881
],
"predicted_sample_max": [
0.03644436010267449,
0.044562567472349685,
0.059070791329785696,
0.07330228502680551,
0.09066226121384864,
0.10684205109445014,
0.14398668519312738,
0.10981135772962523,
0.0977024798082052,
0.10451903167962627,
0.13407475231537747,
0.14329542106776183,
0.0483958154605802,
0.05891685970830821,
0.06962170277774041,
0.08970267633379969,
0.10886171569281433,
0.12370137134992779
],
"predicted_global_mean": 0.034441207937246844,
"predicted_global_max": 0.14398668519312738,
"reference_sample_mean": [
0.0219210348019843,
0.024233269283296416,
0.026033528603793574,
0.02843695754963592,
0.032764693608280375,
0.038499612772304216,
0.040833742371298005,
0.03994490937784413,
0.034994285628502694,
0.035057588083639436,
0.03655343026425834,
0.04012879381507622,
0.025725240154253745,
0.028423181262725528,
0.03187654378781592,
0.03626717372542824,
0.04225909184139334,
0.049518422497335526
],
"reference_sample_max": [
0.08563520097129106,
0.0948496356240846,
0.10703422670448437,
0.11499929840202906,
0.12167656099175754,
0.1339479290239561,
0.1416305084517339,
0.12327345687108168,
0.11864291355638759,
0.13324440822549877,
0.15640461263305983,
0.1715918999448759,
0.10935156957384358,
0.1203433045512512,
0.13544093206575203,
0.14494042048757472,
0.1516038799666998,
0.16390554917782496
],
"reference_global_mean": 0.034081749968270326,
"reference_global_max": 0.1715918999448759
}
},
{
"label": "test",
"num_samples": 18,
"metrics_file": "ensemble_metrics_test.json",
"aggregate_predictions_file": "aggregate_predictions_test.csv",
"member_predictions_file": "member_predictions_test.csv",
"scalar_metrics": {
"overall": {
"mean_mae": 0.1154801346673142,
"mean_rmse": 0.14410019463213125,
"mean_relative_error": 0.42411101062901874
},
"per_target": {
"benchmark_normalized_transmission": {
"mae": 0.05548032356816047,
"rmse": 0.07619552746597794,
"mean_relative_error": 0.0689904809952569
},
"benchmark_normalized_reflection": {
"mae": 0.05391017972659699,
"rmse": 0.07490675936418294,
"mean_relative_error": 0.9925814419963294
},
"normalized_peak_intensity": {
"mae": 0.23704990070718512,
"rmse": 0.2811982970662329,
"mean_relative_error": 0.21076110889546987
}
}
},
"uncertainty": {
"per_target": {
"benchmark_normalized_transmission": {
"mean_predictive_std": 0.009513890491645263,
"max_predictive_std": 0.01772107602815368,
"uncertainty_error_correlation": -0.34694362977066173
},
"benchmark_normalized_reflection": {
"mean_predictive_std": 0.0104560073445761,
"max_predictive_std": 0.020682545743072333,
"uncertainty_error_correlation": -0.45577046161542334
},
"normalized_peak_intensity": {
"mean_predictive_std": 0.018602482360050267,
"max_predictive_std": 0.028309676543455432,
"uncertainty_error_correlation": -0.07110593835471898
}
},
"overall_mean_predictive_std": 0.012857460065423876,
"overall_uncertainty_error_correlation": -0.2912733432469347
},
"field_metrics": {
"mean_complex_mae": 0.0006997790687022835,
"mean_complex_rmse": 0.0007749699415289967,
"mean_intensity_mae": 1.317969672160672e-06,
"mean_intensity_rmse": 1.6935595396035523e-06
},
"residual": {
"sample_mean": [
0.03934727377227594,
0.03565808493629827,
0.032141392528292796,
0.028875737283629927,
0.025983561072838317,
0.023998416417008436,
0.029226011348961755,
0.026685392056567732,
0.02448237993930488,
0.02240492626451875,
0.021305272166613116,
0.02092777942446199,
0.026172386438518734,
0.024412747386603963,
0.022476200638592677,
0.020297885740817728,
0.01939114639226242,
0.01921126337304517
],
"sample_max": [
0.12635528698825402,
0.1136870174766674,
0.10233210468796367,
0.09418780816189636,
0.09277253467100834,
0.07898375580143513,
0.06998862981479632,
0.06670220792847721,
0.0635276391185121,
0.057712457443394816,
0.05856636295740496,
0.05536435787586984,
0.0637352197093811,
0.06022616871751451,
0.059735366397589446,
0.057761892556683625,
0.05426473017451077,
0.05075201021684029
],
"global_mean": 0.0257221031767007,
"global_max": 0.12635528698825402
},
"boundary": {
"predicted_sample_mean": [
0.057763210712826585,
0.01318621595586357,
0.005777849870732473,
0.013907715226925993,
0.01899713289439706,
0.0248395819546614,
0.05381048551558043,
0.011009059143074217,
0.005383813982702787,
0.0141751215075233,
0.01705115941193596,
0.017372267641204097,
0.03755103367009932,
0.008630842452506405,
0.006788826252022371,
0.014479537523082153,
0.01834896796790188,
0.019026097833012692
],
"predicted_sample_max": [
0.06493499311569094,
0.017909329861919735,
0.006650553643520921,
0.016742345772848066,
0.021226807332621625,
0.026100309182576085,
0.05947578589343746,
0.015138159433806964,
0.006736311920064763,
0.015607078579479707,
0.018600794063341246,
0.018478508921771674,
0.04281535275665709,
0.012113098020417318,
0.00810289028958665,
0.016174353817482535,
0.02043322800107991,
0.020211668303301216
],
"predicted_global_mean": 0.019894384417558482,
"predicted_global_max": 0.06493499311569094,
"reference_sample_mean": [
0.05984716361430416,
0.07283055280690998,
0.08555950792456093,
0.09809948661998483,
0.11040256213281559,
0.12239589894441909,
0.06141156710925102,
0.0737217787700748,
0.08614673499855063,
0.09852047606293887,
0.11071016121454105,
0.1226159697760649,
0.06181795487649191,
0.07403764029143277,
0.08638409241434832,
0.09868965094907936,
0.11082077268189455,
0.12267699938697817
],
"reference_sample_max": [
0.06207037802445194,
0.0752177335599683,
0.08800639731580373,
0.1005137257747916,
0.11271428101019834,
0.12455619668729201,
0.06358915024504505,
0.07607521278050026,
0.08855926067726827,
0.10090104973501916,
0.11299127344395199,
0.12475019586258176,
0.06396427805211795,
0.07636096041773913,
0.08877009117594971,
0.10104838887433706,
0.11308503353132762,
0.12479918488938653
],
"reference_global_mean": 0.0920382761430356,
"reference_global_max": 0.12479918488938653
},
"source": {
"predicted_sample_mean": [
0.02387797202213969,
0.028522446434377593,
0.034460858918825,
0.042693632748454745,
0.04892017026330278,
0.054553153240116746,
0.01717290495850044,
0.02159482396521286,
0.02637831762148539,
0.03199043283480854,
0.03898441410682901,
0.04595211506175123,
0.019233912199892875,
0.023288694224070587,
0.027452937327749137,
0.03331230009553306,
0.041329079892866244,
0.04937691526955285
],
"predicted_sample_max": [
0.08159491458607,
0.06254019105221116,
0.07800855532567558,
0.1013062047336392,
0.1162378291157879,
0.12872179002380627,
0.040122936268798405,
0.05002649055134065,
0.06135619135218837,
0.07956368375790836,
0.09908835306221622,
0.11436928300702207,
0.044224084313143175,
0.05573838806053304,
0.06539120654918422,
0.08292742600224494,
0.10322310815779838,
0.11988048070825105
],
"predicted_global_mean": 0.033838615621414936,
"predicted_global_max": 0.12872179002380627,
"reference_sample_mean": [
0.03183017767722675,
0.030952190143273856,
0.03216173755534499,
0.03407919572391057,
0.035315651856240075,
0.03750996778683249,
0.023194470353122026,
0.025598999357961758,
0.027694522977352067,
0.029178901961207555,
0.03183655164906345,
0.036996705305167805,
0.024514000676014863,
0.026968568769134994,
0.029807264458894485,
0.03348748383619473,
0.03895101196451102,
0.04586693012795556
],
"reference_sample_max": [
0.11159714034432743,
0.10545746528570256,
0.10814214767969627,
0.12381931655764976,
0.14417866860291115,
0.1581091931721042,
0.08941198440434402,
0.09318425256850335,
0.1063597336271905,
0.12064803296874926,
0.1293359524641294,
0.13776460731623427,
0.10126047828138461,
0.11090669388322566,
0.12590311278895192,
0.1361486227465539,
0.1430258299925854,
0.154884447629581
],
"reference_global_mean": 0.031996907343300504,
"reference_global_max": 0.1581091931721042
}
}
],
"ood_probe": {
"label": "ood_probe",
"num_samples": 12,
"metrics_file": "ensemble_metrics_ood_probe.json",
"aggregate_predictions_file": "aggregate_predictions_ood_probe.csv",
"member_predictions_file": "member_predictions_ood_probe.csv",
"scalar_metrics": {
"overall": {
"mean_mae": 0.12926783236339037,
"mean_rmse": 0.14978970910915532,
"mean_relative_error": 0.6607700524779573
},
"per_target": {
"benchmark_normalized_transmission": {
"mae": 0.10747572146765737,
"rmse": 0.1256018652223858,
"mean_relative_error": 0.12674489185957752
},
"benchmark_normalized_reflection": {
"mae": 0.10584133202889677,
"rmse": 0.12381899006948503,
"mean_relative_error": 1.6951671382736864
},
"normalized_peak_intensity": {
"mae": 0.17448644359361698,
"rmse": 0.19994827203559512,
"mean_relative_error": 0.1603981273006078
}
}
},
"uncertainty": {
"per_target": {
"benchmark_normalized_transmission": {
"mean_predictive_std": 0.016544679411819307,
"max_predictive_std": 0.024901564503980626,
"uncertainty_error_correlation": 0.3341292551879384
},
"benchmark_normalized_reflection": {
"mean_predictive_std": 0.020702959255666786,
"max_predictive_std": 0.025442199922928954,
"uncertainty_error_correlation": 0.2634510452474256
},
"normalized_peak_intensity": {
"mean_predictive_std": 0.04026030501502406,
"max_predictive_std": 0.04734345816448756,
"uncertainty_error_correlation": 0.139767364914948
}
},
"overall_mean_predictive_std": 0.025835981227503385,
"overall_uncertainty_error_correlation": 0.24578255511677063
},
"field_metrics": {
"mean_complex_mae": 0.001459347172678947,
"mean_complex_rmse": 0.00160882242284233,
"mean_intensity_mae": 1.8982307323086886e-06,
"mean_intensity_rmse": 2.4080911654780386e-06
},
"residual": {
"sample_mean": [
0.10103054766741522,
0.048921082968568036,
0.08877659916902451,
0.042407039721472965,
0.09871519329479349,
0.047321057114761846,
0.0836025975076337,
0.0402130070307203,
0.08730416651175572,
0.041752166923362984,
0.092630679476968,
0.04432446680987513
],
"sample_max": [
0.29532924957250234,
0.14236435456615903,
0.29356482693785385,
0.13651505773480577,
0.3104457386347516,
0.1432598512737574,
0.258320570225185,
0.12808582018863263,
0.2703646268393658,
0.1324415036642705,
0.29758166326774543,
0.13731693128298247
],
"global_mean": 0.06808321701636266,
"global_max": 0.3104457386347516
},
"boundary": {
"predicted_sample_mean": [
0.17878799411684504,
0.010784274759520335,
0.16702608281980824,
0.012129325906298376,
0.1761642417564619,
0.010677556598310222,
0.1631697340617843,
0.013294544159604186,
0.16355178347609384,
0.013159004319378527,
0.1710958679032068,
0.011448344665728848
],
"predicted_sample_max": [
0.18660074220690578,
0.011279499607524736,
0.174014824485308,
0.012294085278509628,
0.18396031180162675,
0.010980896504656647,
0.17034065382432673,
0.014065627962754366,
0.1705969510935001,
0.01343034208337832,
0.17871592094172967,
0.011884948940375005
],
"predicted_global_mean": 0.09094072954525338,
"predicted_global_max": 0.18660074220690578,
"reference_sample_mean": [
0.04728501905140998,
0.13354968261659655,
0.04288683058177921,
0.1333458789149164,
0.04601016298247958,
0.13337610386276744,
0.040376938879626734,
0.1335513141188486,
0.042265308854214576,
0.1334803999281832,
0.04418773613097318,
0.1333750969681429
],
"reference_sample_max": [
0.04903323007868271,
0.13562679018498783,
0.04515572475684559,
0.1354174854707307,
0.04791710710558449,
0.13545679119529408,
0.04283081231269076,
0.135607761186501,
0.044544973925903994,
0.1355442623873557,
0.046318640319296216,
0.13544955997378166
],
"reference_global_mean": 0.0886408727408282,
"reference_global_max": 0.13562679018498783
},
"source": {
"predicted_sample_mean": [
0.012845966771015172,
0.030928957843374277,
0.013450062249038117,
0.03755293866995806,
0.012907912279035611,
0.03383998081876091,
0.01398019960502075,
0.036784170139829174,
0.013980725402784291,
0.036881549000509196,
0.013206971261081774,
0.03579836439937122
],
"predicted_sample_max": [
0.06434441273495276,
0.07983974216015684,
0.09368303363332478,
0.09351252153871327,
0.08245686818820101,
0.08617514456846288,
0.06903414549849503,
0.09192705564888418,
0.08182785675693259,
0.09119826535901515,
0.08437192378363338,
0.08981861553194549
],
"predicted_global_mean": 0.024346483203314877,
"predicted_global_max": 0.09368303363332478,
"reference_sample_mean": [
0.020517204490226326,
0.03023246311055182,
0.015706950896895,
0.03426315549302617,
0.016735349087180933,
0.029789587835670248,
0.02456539040384174,
0.03349140184442017,
0.015451669918901715,
0.03207138070324892,
0.012085251207256766,
0.03060566461422841
],
"reference_sample_max": [
0.06287510858448725,
0.12480873625142393,
0.0968637461852922,
0.12785750752535727,
0.06115714144114798,
0.1271924219123752,
0.17381385035207986,
0.12379080500731166,
0.09748034773569801,
0.1234998686811949,
0.05847971470290632,
0.12510888900988465
],
"reference_global_mean": 0.024626289133787352,
"reference_global_max": 0.17381385035207986
}
}
}