cowWhySo commited on
Commit
d4fac1f
·
verified ·
1 Parent(s): d6d0a65

Add final-response verifier checkpoint

Browse files
hf_model/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
hf_model/artifact_manifest.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_schema_version": "final-response-verifier-artifact/v1",
3
+ "model_kind": "text-classification-cross-encoder",
4
+ "base_model": "microsoft/deberta-v3-small",
5
+ "label_mode": "production",
6
+ "input_schema_version": "final-response-verifier-input/v1",
7
+ "serializer": "serialize_final_response_state_v1",
8
+ "max_length": 768,
9
+ "requested_gpu_profile": "high_vram_quality",
10
+ "run_profile": "high_vram_quality",
11
+ "memory_profile": {
12
+ "max_length": 768,
13
+ "epochs": 5,
14
+ "train_batch_size": 16,
15
+ "eval_batch_size": 32,
16
+ "grad_accum": 4,
17
+ "max_per_label": 5000,
18
+ "force_retrain": false,
19
+ "export_cpu_only": true
20
+ },
21
+ "gpu_info": {
22
+ "available": true,
23
+ "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition",
24
+ "capability": [
25
+ 12,
26
+ 0
27
+ ],
28
+ "total_gb": 95.0
29
+ },
30
+ "precision_flags": {
31
+ "fp16": false,
32
+ "bf16": true,
33
+ "tf32": true
34
+ },
35
+ "onnx_file": "model.onnx",
36
+ "quantized_onnx_file": "model_quantized.onnx",
37
+ "labels": [
38
+ "valid_final_response",
39
+ "missing_tool_fact",
40
+ "contradicts_tool_result",
41
+ "unsupported_claim",
42
+ "failed_to_acknowledge_data_gap"
43
+ ],
44
+ "deployment_default": "shadow",
45
+ "shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
46
+ "created_unix": 1779844717
47
+ }
hf_model/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 1,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "valid_final_response",
14
+ "1": "missing_tool_fact",
15
+ "2": "contradicts_tool_result",
16
+ "3": "unsupported_claim",
17
+ "4": "failed_to_acknowledge_data_gap"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "contradicts_tool_result": 2,
23
+ "failed_to_acknowledge_data_gap": 4,
24
+ "missing_tool_fact": 1,
25
+ "unsupported_claim": 3,
26
+ "valid_final_response": 0
27
+ },
28
+ "layer_norm_eps": 1e-07,
29
+ "legacy": true,
30
+ "max_position_embeddings": 512,
31
+ "max_relative_positions": -1,
32
+ "model_type": "deberta-v2",
33
+ "norm_rel_ebd": "layer_norm",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 6,
36
+ "pad_token_id": 0,
37
+ "pooler_dropout": 0,
38
+ "pooler_hidden_act": "gelu",
39
+ "pooler_hidden_size": 768,
40
+ "pos_att_type": [
41
+ "p2c",
42
+ "c2p"
43
+ ],
44
+ "position_biased_input": false,
45
+ "position_buckets": 256,
46
+ "relative_attention": true,
47
+ "share_att_key": true,
48
+ "transformers_version": "4.57.6",
49
+ "type_vocab_size": 0,
50
+ "vocab_size": 128100
51
+ }
hf_model/input_schema.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "final-response-verifier-input/v1",
3
+ "type": "object",
4
+ "required": [
5
+ "schema_version",
6
+ "user_request",
7
+ "workflow_state",
8
+ "required_facts",
9
+ "tool_trace",
10
+ "tool_results",
11
+ "candidate_final_response"
12
+ ],
13
+ "properties": {
14
+ "schema_version": {
15
+ "const": "final-response-verifier-input/v1"
16
+ },
17
+ "user_request": {
18
+ "type": "string"
19
+ },
20
+ "workflow_state": {
21
+ "type": "object",
22
+ "required": [
23
+ "required_steps",
24
+ "completed_steps",
25
+ "pending_steps",
26
+ "terminal_tools"
27
+ ],
28
+ "properties": {
29
+ "required_steps": {
30
+ "type": "array",
31
+ "items": {
32
+ "type": "string"
33
+ }
34
+ },
35
+ "completed_steps": {
36
+ "type": "array",
37
+ "items": {
38
+ "type": "string"
39
+ }
40
+ },
41
+ "pending_steps": {
42
+ "type": "array",
43
+ "items": {
44
+ "type": "string"
45
+ }
46
+ },
47
+ "terminal_tools": {
48
+ "type": "array",
49
+ "items": {
50
+ "type": "string"
51
+ }
52
+ },
53
+ "recent_errors": {
54
+ "type": "array",
55
+ "items": {
56
+ "type": "string"
57
+ }
58
+ }
59
+ },
60
+ "additionalProperties": false
61
+ },
62
+ "required_facts": {
63
+ "type": "array",
64
+ "items": {
65
+ "type": "string"
66
+ }
67
+ },
68
+ "tool_trace": {
69
+ "type": "array",
70
+ "items": {
71
+ "type": "string"
72
+ }
73
+ },
74
+ "tool_results": {
75
+ "type": "array",
76
+ "items": {
77
+ "type": "object",
78
+ "required": [
79
+ "tool_name",
80
+ "content"
81
+ ],
82
+ "properties": {
83
+ "tool_name": {
84
+ "type": "string"
85
+ },
86
+ "content": {
87
+ "type": "string"
88
+ }
89
+ },
90
+ "additionalProperties": true
91
+ }
92
+ },
93
+ "candidate_final_response": {
94
+ "type": "string"
95
+ },
96
+ "metadata": {
97
+ "type": "object",
98
+ "properties": {
99
+ "scenario_family": {
100
+ "type": [
101
+ "string",
102
+ "null"
103
+ ]
104
+ },
105
+ "requires_transform": {
106
+ "type": [
107
+ "boolean",
108
+ "null"
109
+ ]
110
+ },
111
+ "requires_synthesis": {
112
+ "type": [
113
+ "boolean",
114
+ "null"
115
+ ]
116
+ },
117
+ "requires_all_tool_facts": {
118
+ "type": [
119
+ "boolean",
120
+ "null"
121
+ ]
122
+ },
123
+ "must_acknowledge_missing_data": {
124
+ "type": [
125
+ "boolean",
126
+ "null"
127
+ ]
128
+ }
129
+ },
130
+ "additionalProperties": false
131
+ }
132
+ },
133
+ "additionalProperties": false
134
+ }
hf_model/labels.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_mode": "production",
3
+ "labels": [
4
+ "valid_final_response",
5
+ "missing_tool_fact",
6
+ "contradicts_tool_result",
7
+ "unsupported_claim",
8
+ "failed_to_acknowledge_data_gap"
9
+ ],
10
+ "label2id": {
11
+ "valid_final_response": 0,
12
+ "missing_tool_fact": 1,
13
+ "contradicts_tool_result": 2,
14
+ "unsupported_claim": 3,
15
+ "failed_to_acknowledge_data_gap": 4
16
+ },
17
+ "id2label": {
18
+ "0": "valid_final_response",
19
+ "1": "missing_tool_fact",
20
+ "2": "contradicts_tool_result",
21
+ "3": "unsupported_claim",
22
+ "4": "failed_to_acknowledge_data_gap"
23
+ }
24
+ }
hf_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02866eaeff9f760e91414b51dc69371420ae83451c2ba06603d69affeb2b920
3
+ size 567607780
hf_model/onnx_parity_report.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema_version": "final-response-verifier-onnx-parity/v1",
3
+ "rows": 10,
4
+ "pt_fp32_top_label_agreement": 1.0,
5
+ "pt_fp32_max_abs_diff": 2.980232238769531e-07,
6
+ "quantized_present": true,
7
+ "fp32_quantized_top_label_agreement": 1.0,
8
+ "fp32_quantized_disagreements": 0,
9
+ "fp32_quantized_max_abs_diff": 0.017383113503456116
10
+ }
hf_model/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
hf_model/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
hf_model/thresholds.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema_version": "final-response-verifier-thresholds/v1",
3
+ "mode": "shadow",
4
+ "default_action": "allow",
5
+ "labels": {
6
+ "valid_final_response": {
7
+ "action": "allow",
8
+ "advisory_min_confidence": 0.0,
9
+ "enforce_min_confidence": 1.01
10
+ },
11
+ "missing_tool_fact": {
12
+ "action": "advisory_then_enforce_after_eval",
13
+ "advisory_min_confidence": 0.9,
14
+ "enforce_min_confidence": 0.995
15
+ },
16
+ "contradicts_tool_result": {
17
+ "action": "advisory_then_enforce_after_eval",
18
+ "advisory_min_confidence": 0.9,
19
+ "enforce_min_confidence": 0.995
20
+ },
21
+ "unsupported_claim": {
22
+ "action": "advisory_then_enforce_after_eval",
23
+ "advisory_min_confidence": 0.9,
24
+ "enforce_min_confidence": 0.995
25
+ },
26
+ "failed_to_acknowledge_data_gap": {
27
+ "action": "advisory_then_enforce_after_eval",
28
+ "advisory_min_confidence": 0.9,
29
+ "enforce_min_confidence": 0.995
30
+ }
31
+ }
32
+ }
hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
hf_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1f8fa2877d9aa33073fe14892601f04518942720c585894ee7d015fa4012e9
3
+ size 5905
hf_model/training_provenance.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema_version": "final-response-verifier-training-provenance/v1",
3
+ "base_model": "microsoft/deberta-v3-small",
4
+ "run_profile": "high_vram_quality",
5
+ "gpu_info": {
6
+ "available": true,
7
+ "name": "NVIDIA RTX PRO 6000 Blackwell Server Edition",
8
+ "capability": [
9
+ 12,
10
+ 0
11
+ ],
12
+ "total_gb": 95.0
13
+ },
14
+ "memory_profile": {
15
+ "max_length": 768,
16
+ "epochs": 5,
17
+ "train_batch_size": 16,
18
+ "eval_batch_size": 32,
19
+ "grad_accum": 4,
20
+ "max_per_label": 5000,
21
+ "force_retrain": false,
22
+ "export_cpu_only": true
23
+ },
24
+ "rows": 90,
25
+ "train_rows": 70,
26
+ "validation_rows": 10,
27
+ "test_rows": 10,
28
+ "label_counts": {
29
+ "contradicts_tool_result": 18,
30
+ "missing_tool_fact": 18,
31
+ "unsupported_claim": 18,
32
+ "valid_final_response": 18,
33
+ "failed_to_acknowledge_data_gap": 18
34
+ },
35
+ "resumed_from_checkpoint": true,
36
+ "train_metrics": {
37
+ "train_runtime": 14.31,
38
+ "train_samples_per_second": 24.458,
39
+ "train_steps_per_second": 0.699,
40
+ "total_flos": 16883336101500.0,
41
+ "train_loss": 1.6138598124186199,
42
+ "epoch": 3.0
43
+ },
44
+ "test_metrics": {
45
+ "eval_loss": 1.6188628673553467,
46
+ "eval_accuracy": 0.2,
47
+ "eval_macro_precision": 0.04,
48
+ "eval_macro_recall": 0.2,
49
+ "eval_macro_f1": 0.06666666666666667,
50
+ "eval_macro_precision_all_labels": 0.04,
51
+ "eval_macro_recall_all_labels": 0.2,
52
+ "eval_macro_f1_all_labels": 0.06666666666666667,
53
+ "eval_runtime": 0.3699,
54
+ "eval_samples_per_second": 27.032,
55
+ "eval_steps_per_second": 2.703,
56
+ "epoch": 3.0
57
+ }
58
+ }