leaderboard / data /processed_config.json
tareknaser's picture
chore: update data with agentic results
a83c01d unverified
{
"experiments": [
{
"name": "Bash Only + Claude Sonnet 4.6",
"data_file": "bash_only_+_claude_sonnet_4.6_vanilla.json",
"tag": "vanilla",
"model_id": "anthropic/claude-sonnet-4-6"
},
{
"name": "Bash Only + Claude Sonnet 4.6",
"data_file": "bash_only_+_claude_sonnet_4.6_benign.json",
"tag": "benign",
"model_id": "anthropic/claude-sonnet-4-6"
},
{
"name": "Bash Only + Claude Sonnet 4.6",
"data_file": "bash_only_+_claude_sonnet_4.6_malignant.json",
"tag": "malignant",
"model_id": "anthropic/claude-sonnet-4-6"
},
{
"name": "Bash Only + Claude Sonnet 4.6",
"data_file": "bash_only_+_claude_sonnet_4.6_rust-specific.json",
"tag": "rust-specific",
"model_id": "anthropic/claude-sonnet-4-6"
},
{
"name": "Bash Only + GPT 5.4",
"data_file": "bash_only_+_gpt_5.4_vanilla.json",
"tag": "vanilla",
"model_id": "openai/gpt-5.4"
},
{
"name": "Bash Only + GPT 5.4",
"data_file": "bash_only_+_gpt_5.4_benign.json",
"tag": "benign",
"model_id": "openai/gpt-5.4"
},
{
"name": "Bash Only + GPT 5.4",
"data_file": "bash_only_+_gpt_5.4_malignant.json",
"tag": "malignant",
"model_id": "openai/gpt-5.4"
},
{
"name": "Bash Only + GPT 5.4",
"data_file": "bash_only_+_gpt_5.4_rust-specific.json",
"tag": "rust-specific",
"model_id": "openai/gpt-5.4"
},
{
"name": "Bash Only + Gemini 3.1 Pro",
"data_file": "bash_only_+_gemini_3.1_pro_vanilla.json",
"tag": "vanilla",
"model_id": "google/gemini-3.1-pro-preview"
},
{
"name": "Bash Only + Gemini 3.1 Pro",
"data_file": "bash_only_+_gemini_3.1_pro_benign.json",
"tag": "benign",
"model_id": "google/gemini-3.1-pro-preview"
},
{
"name": "Bash Only + Gemini 3.1 Pro",
"data_file": "bash_only_+_gemini_3.1_pro_malignant.json",
"tag": "malignant",
"model_id": "google/gemini-3.1-pro-preview"
},
{
"name": "Bash Only + Gemini 3.1 Pro",
"data_file": "bash_only_+_gemini_3.1_pro_rust-specific.json",
"tag": "rust-specific",
"model_id": "google/gemini-3.1-pro-preview"
},
{
"name": "Bash Only + Qwen 3.6 Plus",
"data_file": "bash_only_+_qwen_3.6_plus_vanilla.json",
"tag": "vanilla",
"model_id": "openrouter/qwen/qwen3.6-plus"
},
{
"name": "Bash Only + Qwen 3.6 Plus",
"data_file": "bash_only_+_qwen_3.6_plus_benign.json",
"tag": "benign",
"model_id": "openrouter/qwen/qwen3.6-plus"
},
{
"name": "Bash Only + Qwen 3.6 Plus",
"data_file": "bash_only_+_qwen_3.6_plus_malignant.json",
"tag": "malignant",
"model_id": "openrouter/qwen/qwen3.6-plus"
},
{
"name": "Bash Only + Qwen 3.6 Plus",
"data_file": "bash_only_+_qwen_3.6_plus_rust-specific.json",
"tag": "rust-specific",
"model_id": "openrouter/qwen/qwen3.6-plus"
}
]
}