socratic-env / leaderboard.json
Developer-Amar's picture
Update project files
a88eb76
raw
history blame contribute delete
671 Bytes
{
"entries": [
{
"model_name": "Llama 3.1 8B (baseline)",
"factual_recall": 0.71,
"socratic_dialogue": 0.68,
"misconception_trap": 0.58,
"overall": 0.657,
"timestamp": "2026-04-06 17:10 UTC"
},
{
"model_name": "Random agent",
"factual_recall": 0.18,
"socratic_dialogue": 0.22,
"misconception_trap": 0.1,
"overall": 0.167,
"timestamp": "2026-04-06 17:10 UTC"
},
{
"model_name": "Test Model pytest",
"factual_recall": 0.75,
"socratic_dialogue": 0.68,
"misconception_trap": 0.6,
"overall": 0.677,
"timestamp": "2026-04-25 18:36 UTC"
}
]
}