cowWhySo commited on
Commit
80593d1
·
verified ·
1 Parent(s): 6567725

Add final-response verifier ONNX artifacts

Browse files
onnx/artifact_manifest.json CHANGED
@@ -49,5 +49,5 @@
49
  ],
50
  "deployment_default": "shadow",
51
  "shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
52
- "created_unix": 1779885930
53
  }
 
49
  ],
50
  "deployment_default": "shadow",
51
  "shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
52
+ "created_unix": 1780095207
53
  }
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:186472a7a434ecaba20f1900d9b8487d077fe370d237c2a9e2a5dd950cbf99ab
3
  size 568055401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3849500df5f74f1da797de9bf0c62231639e6d494d2f1ebbbd37b1423dff3adb
3
  size 568055401
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20cd2f7cc417304e8a1a872367d412d1de1e8ed36e4d5761a44cf564c36ce60b
3
  size 172267901
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6b6ab9ac6c268e164d7bcb04c2bb9d0bd91c4a2b798e7ece0d97a7fc9c01c6
3
  size 172267901
onnx/onnx_parity_report.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "schema_version": "final-response-verifier-onnx-parity/v1",
3
- "rows": 10,
4
  "pt_fp32_top_label_agreement": 1.0,
5
- "pt_fp32_max_abs_diff": 3.5762786865234375e-07,
6
  "quantized_present": true,
7
  "fp32_quantized_top_label_agreement": 1.0,
8
  "fp32_quantized_disagreements": 0,
9
- "fp32_quantized_max_abs_diff": 0.021983787417411804
10
  }
 
1
  {
2
  "schema_version": "final-response-verifier-onnx-parity/v1",
3
+ "rows": 14,
4
  "pt_fp32_top_label_agreement": 1.0,
5
+ "pt_fp32_max_abs_diff": 2.4586915969848633e-07,
6
  "quantized_present": true,
7
  "fp32_quantized_top_label_agreement": 1.0,
8
  "fp32_quantized_disagreements": 0,
9
+ "fp32_quantized_max_abs_diff": 0.01770871877670288
10
  }
onnx/training_provenance.json CHANGED
@@ -21,38 +21,38 @@
21
  "force_retrain": false,
22
  "export_cpu_only": true
23
  },
24
- "rows": 90,
25
- "train_rows": 70,
26
- "validation_rows": 10,
27
- "test_rows": 10,
28
  "label_counts": {
29
- "contradicts_tool_result": 18,
30
- "missing_tool_fact": 18,
31
  "unsupported_claim": 18,
32
- "valid_final_response": 18,
33
  "failed_to_acknowledge_data_gap": 18
34
  },
35
  "resumed_from_checkpoint": false,
36
  "train_metrics": {
37
- "train_runtime": 14.0866,
38
- "train_samples_per_second": 24.846,
39
- "train_steps_per_second": 0.71,
40
- "total_flos": 16883336101500.0,
41
- "train_loss": 1.620902379353841,
42
  "epoch": 3.0
43
  },
44
  "test_metrics": {
45
- "eval_loss": 1.6238057613372803,
46
- "eval_accuracy": 0.2,
47
- "eval_macro_precision": 0.04,
48
  "eval_macro_recall": 0.2,
49
- "eval_macro_f1": 0.06666666666666667,
50
- "eval_macro_precision_all_labels": 0.04,
51
  "eval_macro_recall_all_labels": 0.2,
52
- "eval_macro_f1_all_labels": 0.06666666666666667,
53
- "eval_runtime": 0.4888,
54
- "eval_samples_per_second": 20.46,
55
- "eval_steps_per_second": 2.046,
56
  "epoch": 3.0
57
  }
58
  }
 
21
  "force_retrain": false,
22
  "export_cpu_only": true
23
  },
24
+ "rows": 128,
25
+ "train_rows": 97,
26
+ "validation_rows": 17,
27
+ "test_rows": 14,
28
  "label_counts": {
29
+ "valid_final_response": 37,
30
+ "contradicts_tool_result": 37,
31
  "unsupported_claim": 18,
32
+ "missing_tool_fact": 18,
33
  "failed_to_acknowledge_data_gap": 18
34
  },
35
  "resumed_from_checkpoint": false,
36
  "train_metrics": {
37
+ "train_runtime": 10.306,
38
+ "train_samples_per_second": 47.06,
39
+ "train_steps_per_second": 0.97,
40
+ "total_flos": 39847260684000.0,
41
+ "train_loss": 1.6460792223612468,
42
  "epoch": 3.0
43
  },
44
  "test_metrics": {
45
+ "eval_loss": 1.6308313608169556,
46
+ "eval_accuracy": 0.14285714285714285,
47
+ "eval_macro_precision": 0.02857142857142857,
48
  "eval_macro_recall": 0.2,
49
+ "eval_macro_f1": 0.05,
50
+ "eval_macro_precision_all_labels": 0.02857142857142857,
51
  "eval_macro_recall_all_labels": 0.2,
52
+ "eval_macro_f1_all_labels": 0.05,
53
+ "eval_runtime": 0.4413,
54
+ "eval_samples_per_second": 31.727,
55
+ "eval_steps_per_second": 2.266,
56
  "epoch": 3.0
57
  }
58
  }