physics-llm-paper / eval_data /conservation.json
AlexWortega's picture
fix: reviewer-critique fixes, in-distribution conservation eval, collision table PLM estimates
cca29c8 verified
{
"description": "In-distribution billiards (gravity on). px_err: |\u03a3m\u00b7pred_vx - \u03a3m\u00b7gt_vx| / max(|\u03a3m\u00b7vx_0|, 1.0). ke_err: |KE_pred - KE_gt| / KE_gt on free-flight frames only.",
"px_err_curve": [
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75,
0.75000001566302,
0.75000001566302,
0.75000001566302,
0.75000001566302,
0.75000001566302,
0.7500000313260401,
0.7500000145662762,
0.7500000341022239,
0.7500000184392039,
0.7499999956235059,
0.750000002776184,
0.750000002776184,
0.7500000005674178,
0.7500000005674178,
0.7500000005674178,
0.7500000227703068,
0.7500000005674178,
0.7499999561616398,
0.7499999783645288,
0.7500000162304378,
0.7500000162304378,
0.7500000162304378,
0.7500000005674178,
0.7500000317349407,
0.7500000539378296,
0.7500000761407186,
0.7500000539378296
],
"px_err_std_curve": [
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.4330127018922193,
0.43301271093527005,
0.4330127109352701,
0.43301271093527005,
0.43301271093527005,
0.43301271093527005,
0.4330127199783245,
0.43301271030207417,
0.43301272158116094,
0.4330127125381066,
0.43301269936546244,
0.43301270349505605,
0.43301270349505605,
0.43301270221983296,
0.43301270221983296,
0.433012702219833,
0.43301271503868066,
0.43301270221983296,
0.4330126765821604,
0.4330126894009929,
0.43301271126288365,
0.43301271126288365,
0.43301271126288365,
0.43301270221983296,
0.4330126482361042,
0.43301266105495184,
0.43301267387380704,
0.4330126610549518
],
"mean_ke_err_free_flight": 0.7418824476788403,
"std_ke_err_free_flight": 0.420238821501294
}