| [ | |
| { | |
| "variant": "baseline", | |
| "model": "gpt-oss", | |
| "challenge_cases": 12, | |
| "challenge_avg_score": 10.0, | |
| "coverage_cases": 17, | |
| "coverage_avg_score": 10.0, | |
| "coverage_endpoint_rate": 1.0, | |
| "coverage_method_rate": 1.0, | |
| "total_tool_calls": 93, | |
| "total_tokens": 1461966, | |
| "composite": 1.0, | |
| "paths": { | |
| "challenges_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/baseline/gpt-oss/challenges.json", | |
| "coverage_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/baseline/gpt-oss/coverage.json" | |
| } | |
| }, | |
| { | |
| "variant": "compact", | |
| "model": "gpt-oss", | |
| "challenge_cases": 12, | |
| "challenge_avg_score": 9.583, | |
| "coverage_cases": 17, | |
| "coverage_avg_score": 9.765, | |
| "coverage_endpoint_rate": 0.9412, | |
| "coverage_method_rate": 1.0, | |
| "total_tool_calls": 58, | |
| "total_tokens": 242906, | |
| "composite": 0.9574, | |
| "paths": { | |
| "challenges_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/compact/gpt-oss/challenges.json", | |
| "coverage_json": "/home/ssmith/hf-hub-cards/docs/hf_hub_prompt_ab/compact/gpt-oss/coverage.json" | |
| } | |
| } | |
| ] |