cowWhySo commited on
Commit
a4b643d
·
verified ·
1 Parent(s): 80593d1

Add final-response verifier checkpoint

Browse files
hf_model/artifact_manifest.json CHANGED
@@ -15,7 +15,9 @@
15
  "eval_batch_size": 32,
16
  "grad_accum": 4,
17
  "max_per_label": 5000,
18
- "force_retrain": false,
 
 
19
  "export_cpu_only": true
20
  },
21
  "gpu_info": {
@@ -49,5 +51,5 @@
49
  ],
50
  "deployment_default": "shadow",
51
  "shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
52
- "created_unix": 1780095207
53
  }
 
15
  "eval_batch_size": 32,
16
  "grad_accum": 4,
17
  "max_per_label": 5000,
18
+ "balance_labels": true,
19
+ "min_per_label": 64,
20
+ "force_retrain": true,
21
  "export_cpu_only": true
22
  },
23
  "gpu_info": {
 
51
  ],
52
  "deployment_default": "shadow",
53
  "shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
54
+ "created_unix": 1780181972
55
  }
hf_model/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8c89842b5aba73baf40ed2feba491dc2f943a29607b1cd7c2283b3223da02e3
3
  size 567607780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0398c7150936e081bbe64568c55846b57eaa9af6d3a31551d0c878ca33d171
3
  size 567607780
hf_model/onnx_parity_report.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "schema_version": "final-response-verifier-onnx-parity/v1",
3
- "rows": 14,
4
  "pt_fp32_top_label_agreement": 1.0,
5
- "pt_fp32_max_abs_diff": 2.4586915969848633e-07,
6
  "quantized_present": true,
7
  "fp32_quantized_top_label_agreement": 1.0,
8
  "fp32_quantized_disagreements": 0,
9
- "fp32_quantized_max_abs_diff": 0.01770871877670288
10
  }
 
1
  {
2
  "schema_version": "final-response-verifier-onnx-parity/v1",
3
+ "rows": 33,
4
  "pt_fp32_top_label_agreement": 1.0,
5
+ "pt_fp32_max_abs_diff": 4.470348358154297e-07,
6
  "quantized_present": true,
7
  "fp32_quantized_top_label_agreement": 1.0,
8
  "fp32_quantized_disagreements": 0,
9
+ "fp32_quantized_max_abs_diff": 0.03157895430922508
10
  }
hf_model/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3aa09ad6bcd2b2645c3222fbfd7f683982407dbb27cefb40e1903787f71d0a2
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:906f6075ba75e9681408fd0cfbb76d1dbc4c174c8391db6fb133a8ba8ba64706
3
  size 5905
hf_model/training_provenance.json CHANGED
@@ -18,41 +18,43 @@
18
  "eval_batch_size": 32,
19
  "grad_accum": 4,
20
  "max_per_label": 5000,
21
- "force_retrain": false,
 
 
22
  "export_cpu_only": true
23
  },
24
- "rows": 128,
25
- "train_rows": 97,
26
- "validation_rows": 17,
27
- "test_rows": 14,
28
  "label_counts": {
29
- "valid_final_response": 37,
30
- "contradicts_tool_result": 37,
31
- "unsupported_claim": 18,
32
- "missing_tool_fact": 18,
33
- "failed_to_acknowledge_data_gap": 18
34
  },
35
  "resumed_from_checkpoint": false,
36
  "train_metrics": {
37
- "train_runtime": 10.306,
38
- "train_samples_per_second": 47.06,
39
- "train_steps_per_second": 0.97,
40
- "total_flos": 39847260684000.0,
41
- "train_loss": 1.6460792223612468,
42
  "epoch": 3.0
43
  },
44
  "test_metrics": {
45
- "eval_loss": 1.6308313608169556,
46
- "eval_accuracy": 0.14285714285714285,
47
- "eval_macro_precision": 0.02857142857142857,
48
  "eval_macro_recall": 0.2,
49
- "eval_macro_f1": 0.05,
50
- "eval_macro_precision_all_labels": 0.02857142857142857,
51
  "eval_macro_recall_all_labels": 0.2,
52
- "eval_macro_f1_all_labels": 0.05,
53
- "eval_runtime": 0.4413,
54
- "eval_samples_per_second": 31.727,
55
- "eval_steps_per_second": 2.266,
56
  "epoch": 3.0
57
  }
58
  }
 
18
  "eval_batch_size": 32,
19
  "grad_accum": 4,
20
  "max_per_label": 5000,
21
+ "balance_labels": true,
22
+ "min_per_label": 64,
23
+ "force_retrain": true,
24
  "export_cpu_only": true
25
  },
26
+ "rows": 378,
27
+ "train_rows": 312,
28
+ "validation_rows": 33,
29
+ "test_rows": 33,
30
  "label_counts": {
31
+ "valid_final_response": 122,
32
+ "unsupported_claim": 64,
33
+ "failed_to_acknowledge_data_gap": 64,
34
+ "missing_tool_fact": 64,
35
+ "contradicts_tool_result": 64
36
  },
37
  "resumed_from_checkpoint": false,
38
  "train_metrics": {
39
+ "train_runtime": 11.0074,
40
+ "train_samples_per_second": 141.723,
41
+ "train_steps_per_second": 2.271,
42
+ "total_flos": 118144540443600.0,
43
+ "train_loss": 1.6387429555257162,
44
  "epoch": 3.0
45
  },
46
  "test_metrics": {
47
+ "eval_loss": 1.729296088218689,
48
+ "eval_accuracy": 0.09090909090909091,
49
+ "eval_macro_precision": 0.01818181818181818,
50
  "eval_macro_recall": 0.2,
51
+ "eval_macro_f1": 0.03333333333333333,
52
+ "eval_macro_precision_all_labels": 0.01818181818181818,
53
  "eval_macro_recall_all_labels": 0.2,
54
+ "eval_macro_f1_all_labels": 0.03333333333333333,
55
+ "eval_runtime": 0.5187,
56
+ "eval_samples_per_second": 63.619,
57
+ "eval_steps_per_second": 3.856,
58
  "epoch": 3.0
59
  }
60
  }