Text Classification
Transformers
ONNX
Safetensors
Rust
English
tool-use
tool-calling
guardrails
final-response-verifier
workflow-verification
shadow-mode
Eval Results (legacy)
Instructions to use cowWhySo/final-response-verifier-classifier-production with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cowWhySo/final-response-verifier-classifier-production with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="cowWhySo/final-response-verifier-classifier-production")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("cowWhySo/final-response-verifier-classifier-production", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Add final-response verifier checkpoint
Browse files
hf_model/artifact_manifest.json
CHANGED
|
@@ -15,7 +15,9 @@
|
|
| 15 |
"eval_batch_size": 32,
|
| 16 |
"grad_accum": 4,
|
| 17 |
"max_per_label": 5000,
|
| 18 |
-
"
|
|
|
|
|
|
|
| 19 |
"export_cpu_only": true
|
| 20 |
},
|
| 21 |
"gpu_info": {
|
|
@@ -49,5 +51,5 @@
|
|
| 49 |
],
|
| 50 |
"deployment_default": "shadow",
|
| 51 |
"shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
|
| 52 |
-
"created_unix":
|
| 53 |
}
|
|
|
|
| 15 |
"eval_batch_size": 32,
|
| 16 |
"grad_accum": 4,
|
| 17 |
"max_per_label": 5000,
|
| 18 |
+
"balance_labels": true,
|
| 19 |
+
"min_per_label": 64,
|
| 20 |
+
"force_retrain": true,
|
| 21 |
"export_cpu_only": true
|
| 22 |
},
|
| 23 |
"gpu_info": {
|
|
|
|
| 51 |
],
|
| 52 |
"deployment_default": "shadow",
|
| 53 |
"shadow_first_reason": "experimental final-response verifier; promote only after eval replay",
|
| 54 |
+
"created_unix": 1780181972
|
| 55 |
}
|
hf_model/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 567607780
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb0398c7150936e081bbe64568c55846b57eaa9af6d3a31551d0c878ca33d171
|
| 3 |
size 567607780
|
hf_model/onnx_parity_report.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"schema_version": "final-response-verifier-onnx-parity/v1",
|
| 3 |
-
"rows":
|
| 4 |
"pt_fp32_top_label_agreement": 1.0,
|
| 5 |
-
"pt_fp32_max_abs_diff":
|
| 6 |
"quantized_present": true,
|
| 7 |
"fp32_quantized_top_label_agreement": 1.0,
|
| 8 |
"fp32_quantized_disagreements": 0,
|
| 9 |
-
"fp32_quantized_max_abs_diff": 0.
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"schema_version": "final-response-verifier-onnx-parity/v1",
|
| 3 |
+
"rows": 33,
|
| 4 |
"pt_fp32_top_label_agreement": 1.0,
|
| 5 |
+
"pt_fp32_max_abs_diff": 4.470348358154297e-07,
|
| 6 |
"quantized_present": true,
|
| 7 |
"fp32_quantized_top_label_agreement": 1.0,
|
| 8 |
"fp32_quantized_disagreements": 0,
|
| 9 |
+
"fp32_quantized_max_abs_diff": 0.03157895430922508
|
| 10 |
}
|
hf_model/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906f6075ba75e9681408fd0cfbb76d1dbc4c174c8391db6fb133a8ba8ba64706
|
| 3 |
size 5905
|
hf_model/training_provenance.json
CHANGED
|
@@ -18,41 +18,43 @@
|
|
| 18 |
"eval_batch_size": 32,
|
| 19 |
"grad_accum": 4,
|
| 20 |
"max_per_label": 5000,
|
| 21 |
-
"
|
|
|
|
|
|
|
| 22 |
"export_cpu_only": true
|
| 23 |
},
|
| 24 |
-
"rows":
|
| 25 |
-
"train_rows":
|
| 26 |
-
"validation_rows":
|
| 27 |
-
"test_rows":
|
| 28 |
"label_counts": {
|
| 29 |
-
"valid_final_response":
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
-
"missing_tool_fact":
|
| 33 |
-
"
|
| 34 |
},
|
| 35 |
"resumed_from_checkpoint": false,
|
| 36 |
"train_metrics": {
|
| 37 |
-
"train_runtime":
|
| 38 |
-
"train_samples_per_second":
|
| 39 |
-
"train_steps_per_second":
|
| 40 |
-
"total_flos":
|
| 41 |
-
"train_loss": 1.
|
| 42 |
"epoch": 3.0
|
| 43 |
},
|
| 44 |
"test_metrics": {
|
| 45 |
-
"eval_loss": 1.
|
| 46 |
-
"eval_accuracy": 0.
|
| 47 |
-
"eval_macro_precision": 0.
|
| 48 |
"eval_macro_recall": 0.2,
|
| 49 |
-
"eval_macro_f1": 0.
|
| 50 |
-
"eval_macro_precision_all_labels": 0.
|
| 51 |
"eval_macro_recall_all_labels": 0.2,
|
| 52 |
-
"eval_macro_f1_all_labels": 0.
|
| 53 |
-
"eval_runtime": 0.
|
| 54 |
-
"eval_samples_per_second":
|
| 55 |
-
"eval_steps_per_second":
|
| 56 |
"epoch": 3.0
|
| 57 |
}
|
| 58 |
}
|
|
|
|
| 18 |
"eval_batch_size": 32,
|
| 19 |
"grad_accum": 4,
|
| 20 |
"max_per_label": 5000,
|
| 21 |
+
"balance_labels": true,
|
| 22 |
+
"min_per_label": 64,
|
| 23 |
+
"force_retrain": true,
|
| 24 |
"export_cpu_only": true
|
| 25 |
},
|
| 26 |
+
"rows": 378,
|
| 27 |
+
"train_rows": 312,
|
| 28 |
+
"validation_rows": 33,
|
| 29 |
+
"test_rows": 33,
|
| 30 |
"label_counts": {
|
| 31 |
+
"valid_final_response": 122,
|
| 32 |
+
"unsupported_claim": 64,
|
| 33 |
+
"failed_to_acknowledge_data_gap": 64,
|
| 34 |
+
"missing_tool_fact": 64,
|
| 35 |
+
"contradicts_tool_result": 64
|
| 36 |
},
|
| 37 |
"resumed_from_checkpoint": false,
|
| 38 |
"train_metrics": {
|
| 39 |
+
"train_runtime": 11.0074,
|
| 40 |
+
"train_samples_per_second": 141.723,
|
| 41 |
+
"train_steps_per_second": 2.271,
|
| 42 |
+
"total_flos": 118144540443600.0,
|
| 43 |
+
"train_loss": 1.6387429555257162,
|
| 44 |
"epoch": 3.0
|
| 45 |
},
|
| 46 |
"test_metrics": {
|
| 47 |
+
"eval_loss": 1.729296088218689,
|
| 48 |
+
"eval_accuracy": 0.09090909090909091,
|
| 49 |
+
"eval_macro_precision": 0.01818181818181818,
|
| 50 |
"eval_macro_recall": 0.2,
|
| 51 |
+
"eval_macro_f1": 0.03333333333333333,
|
| 52 |
+
"eval_macro_precision_all_labels": 0.01818181818181818,
|
| 53 |
"eval_macro_recall_all_labels": 0.2,
|
| 54 |
+
"eval_macro_f1_all_labels": 0.03333333333333333,
|
| 55 |
+
"eval_runtime": 0.5187,
|
| 56 |
+
"eval_samples_per_second": 63.619,
|
| 57 |
+
"eval_steps_per_second": 3.856,
|
| 58 |
"epoch": 3.0
|
| 59 |
}
|
| 60 |
}
|