hf-papers / docs /hf_hub_prompt_ab /prompt_ab_summary.json
evalstate's picture
evalstate HF Staff
sync: promote hf_hub_community prompt v3 + add prompt/coverage harness
bba4fab verified
[
{
"variant": "baseline",
"model": "gpt-oss",
"challenge_cases": 12,
"challenge_avg_score": 10.0,
"coverage_cases": 17,
"coverage_avg_score": 10.0,
"coverage_endpoint_rate": 1.0,
"coverage_method_rate": 1.0,
"total_tool_calls": 93,
"total_tokens": 1461966,
"composite": 1.0,
"paths": {
"challenges_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/baseline/gpt-oss/challenges.json",
"coverage_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/baseline/gpt-oss/coverage.json"
}
},
{
"variant": "compact",
"model": "gpt-oss",
"challenge_cases": 12,
"challenge_avg_score": 9.583,
"coverage_cases": 17,
"coverage_avg_score": 9.765,
"coverage_endpoint_rate": 0.9412,
"coverage_method_rate": 1.0,
"total_tool_calls": 58,
"total_tokens": 242906,
"composite": 0.9574,
"paths": {
"challenges_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/compact/gpt-oss/challenges.json",
"coverage_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/compact/gpt-oss/coverage.json"
}
}
]