Nexa_Mat2 / eval /controller_smoke /all_summary.json
Allanatrix's picture
Publish NexaMat rollout stack manifests and examples
c445e73 verified
{
"alignment_gates": 3,
"alignment_score": 0.4315514275885793,
"candidate_decision_accuracy": 0.16666666666666666,
"candidates": 6,
"controller_alignment_promotable": false,
"criteria": {
"alignment_ready_for_fft": true,
"bad_candidate_false_positive_max": 0.0,
"min_frontier_preservation": 1.0,
"min_known_deprioritization": 1.0,
"uncontained_hallucination_max": 0.0
},
"false_positive_rate_on_bad_candidates": 0.0,
"frontier_preservation_rate": 1.0,
"gate_checks": {
"bad_candidate_false_positive": true,
"frontier_preservation": true,
"known_deprioritization": false,
"ready_for_fft": false,
"uncontained_hallucination": false
},
"known_deprioritization_rate": 0.0,
"prediction_generators": [
"AethronPhantom/Nexa_Mat2/multimodal/controller/nexa_mat_controller_fft_pilot_20260518T234148Z/final_model_merged"
],
"ready_for_fft": false,
"records": 6,
"report": "/dev/shm/nexa_mat_controller_rollout_all_eval.md",
"structured_output_validity": 0.16666666666666666,
"uncontained_hallucination_rate": 1.0
}