shadowops-hackathon / backend-ml /training /model_eval_report.json
ShadowOps Deploy
Final deploy: Monolithic ShadowOps app + Training Scripts
d064478
{
"model_name_or_path": "shadowops_qwen3_1p7b_model",
"checkpoint_path": "shadowops_qwen3_1p7b_model",
"eval_split": "validation",
"sample_count": 100,
"dataset_audit": {
"train_sample_count": 500,
"val_sample_count": 100,
"train_val_overlap_count": 0
},
"model_metrics": null,
"q_aware_baseline": {
"label": "q_aware",
"sample_count": 100,
"exact_match": 0.99,
"safety_accuracy": 1.0,
"valid_action_rate": 1.0,
"invalid_action_rate": 0.0,
"invalid_output_rate": 0.0,
"parse_failure_rate": 0.0,
"unsafe_decision_rate": 0.0,
"false_positive_rate": 0.0,
"false_negative_rate": 0.0,
"reward_mean": 1.93683846,
"reward_std": 0.33417104399464115,
"allow_precision": 1.0,
"block_precision": 1.0,
"fork_precision": 1.0,
"quarantine_precision": 0.9565217391304348,
"per_action_accuracy": {
"ALLOW": 1.0,
"BLOCK": 1.0,
"FORK": 0.9642857142857143,
"QUARANTINE": 1.0
},
"confusion_matrix": {
"ALLOW": {
"ALLOW": 35,
"BLOCK": 0,
"FORK": 0,
"QUARANTINE": 0,
"INVALID": 0
},
"BLOCK": {
"ALLOW": 0,
"BLOCK": 15,
"FORK": 0,
"QUARANTINE": 0,
"INVALID": 0
},
"FORK": {
"ALLOW": 0,
"BLOCK": 0,
"FORK": 27,
"QUARANTINE": 1,
"INVALID": 0
},
"QUARANTINE": {
"ALLOW": 0,
"BLOCK": 0,
"FORK": 0,
"QUARANTINE": 22,
"INVALID": 0
}
},
"avg_completion_length": 1.0,
"action_distribution": {
"ALLOW": 0.35,
"BLOCK": 0.15,
"FORK": 0.27,
"QUARANTINE": 0.23
},
"normalized_action_distribution": {
"ALLOW": 0.35,
"BLOCK": 0.15,
"FORK": 0.27,
"QUARANTINE": 0.23
},
"invalid_output_count": 0,
"multi_action_warnings": 0,
"multi_action_warning_rate": 0.0,
"entropy": 1.9383346690254595,
"reward_breakdown": {
"exact_correct": 50,
"exact_conservative": 49,
"minor_wrong": 1
}
},
"delta_vs_q_aware": null,
"training_gate": {
"training_gate_status": "FAIL",
"training_gate_passed": false,
"reason": "No model metrics are available; checkpoint was not loaded or evaluation failed.",
"recommended_next_action": "Run --evaluate-model with a valid --model-path after SFT/GRPO smoke training."
},
"training_gate_status": "FAIL",
"training_gate_passed": false,
"reason": "No model metrics are available; checkpoint was not loaded or evaluation failed.",
"model_load_error": "Model stack unavailable. Check torch, datasets, transformers, trl, unsloth, CUDA, and checkpoint path."
}