agllm2-dev / test_app_models_results.json
arbabarshad's picture
Upgrade all LLM models to latest versions, add test harness
5bbe904
{
"timestamp": "2026-03-31T12:40:43",
"species_tested": "Stenolophus lecontei",
"tested": 7,
"passed": 7,
"failed": 0,
"skipped": 0,
"results": [
{
"model": "GPT-4",
"status": "PASS",
"response": "- According to Midwest USA guidance, the common name of Stenolophus lecontei is **seedcorn beetle**.",
"latency_seconds": 1.95
},
{
"model": "GPT-4.1 Mini",
"status": "PASS",
"response": "According to Midwest USA guidance, the common name of *Stenolophus lecontei* is the **seedcorn beetle**.",
"latency_seconds": 1.37
},
{
"model": "Llama-4 Maverick",
"status": "PASS",
"response": "The common name of Stenolophus lecontei is the seedcorn beetle.",
"latency_seconds": 2.56
},
{
"model": "Llama-4 Scout",
"status": "PASS",
"response": "The common name of Stenolophus lecontei is the Seedcorn beetle. \n\nAccording to Midwest USA guidance, it is also referred to as the Slender seedcorn beetle, although this name seems to refer to a close",
"latency_seconds": 1.37
},
{
"model": "Gemini-2.5 Pro",
"status": "PASS",
"response": "Based on the provided information for the Midwest USA:\n\n*",
"latency_seconds": 8.34
},
{
"model": "Claude Opus 4.6",
"status": "PASS",
"response": "**Seedcorn beetle.**",
"latency_seconds": 2.77
},
{
"model": "Claude Sonnet 4.6",
"status": "PASS",
"response": "The common name of *Stenolophus lecontei* is the **seedcorn beetle**.",
"latency_seconds": 6.93
}
]
}