Spaces:
Sleeping
Sleeping
| { | |
| "model_name_or_path": "shadowops_qwen3_1p7b_model", | |
| "checkpoint_path": "shadowops_qwen3_1p7b_model", | |
| "eval_split": "validation", | |
| "sample_count": 100, | |
| "dataset_audit": { | |
| "train_sample_count": 500, | |
| "val_sample_count": 100, | |
| "train_val_overlap_count": 0 | |
| }, | |
| "model_metrics": null, | |
| "q_aware_baseline": { | |
| "label": "q_aware", | |
| "sample_count": 100, | |
| "exact_match": 0.99, | |
| "safety_accuracy": 1.0, | |
| "valid_action_rate": 1.0, | |
| "invalid_action_rate": 0.0, | |
| "invalid_output_rate": 0.0, | |
| "parse_failure_rate": 0.0, | |
| "unsafe_decision_rate": 0.0, | |
| "false_positive_rate": 0.0, | |
| "false_negative_rate": 0.0, | |
| "reward_mean": 1.93683846, | |
| "reward_std": 0.33417104399464115, | |
| "allow_precision": 1.0, | |
| "block_precision": 1.0, | |
| "fork_precision": 1.0, | |
| "quarantine_precision": 0.9565217391304348, | |
| "per_action_accuracy": { | |
| "ALLOW": 1.0, | |
| "BLOCK": 1.0, | |
| "FORK": 0.9642857142857143, | |
| "QUARANTINE": 1.0 | |
| }, | |
| "confusion_matrix": { | |
| "ALLOW": { | |
| "ALLOW": 35, | |
| "BLOCK": 0, | |
| "FORK": 0, | |
| "QUARANTINE": 0, | |
| "INVALID": 0 | |
| }, | |
| "BLOCK": { | |
| "ALLOW": 0, | |
| "BLOCK": 15, | |
| "FORK": 0, | |
| "QUARANTINE": 0, | |
| "INVALID": 0 | |
| }, | |
| "FORK": { | |
| "ALLOW": 0, | |
| "BLOCK": 0, | |
| "FORK": 27, | |
| "QUARANTINE": 1, | |
| "INVALID": 0 | |
| }, | |
| "QUARANTINE": { | |
| "ALLOW": 0, | |
| "BLOCK": 0, | |
| "FORK": 0, | |
| "QUARANTINE": 22, | |
| "INVALID": 0 | |
| } | |
| }, | |
| "avg_completion_length": 1.0, | |
| "action_distribution": { | |
| "ALLOW": 0.35, | |
| "BLOCK": 0.15, | |
| "FORK": 0.27, | |
| "QUARANTINE": 0.23 | |
| }, | |
| "normalized_action_distribution": { | |
| "ALLOW": 0.35, | |
| "BLOCK": 0.15, | |
| "FORK": 0.27, | |
| "QUARANTINE": 0.23 | |
| }, | |
| "invalid_output_count": 0, | |
| "multi_action_warnings": 0, | |
| "multi_action_warning_rate": 0.0, | |
| "entropy": 1.9383346690254595, | |
| "reward_breakdown": { | |
| "exact_correct": 50, | |
| "exact_conservative": 49, | |
| "minor_wrong": 1 | |
| } | |
| }, | |
| "delta_vs_q_aware": null, | |
| "training_gate": { | |
| "training_gate_status": "FAIL", | |
| "training_gate_passed": false, | |
| "reason": "No model metrics are available; checkpoint was not loaded or evaluation failed.", | |
| "recommended_next_action": "Run --evaluate-model with a valid --model-path after SFT/GRPO smoke training." | |
| }, | |
| "training_gate_status": "FAIL", | |
| "training_gate_passed": false, | |
| "reason": "No model metrics are available; checkpoint was not loaded or evaluation failed.", | |
| "model_load_error": "Model stack unavailable. Check torch, datasets, transformers, trl, unsloth, CUDA, and checkpoint path." | |
| } |