[ { "label": "Output v0", "file": "output_v0.jsonl", "description": "Baseline export generated from the initial evaluation run." }, { "label": "Output v1", "file": "output_v1.jsonl", "description": "Updated question generation and validation scripts." }, { "label": "Output v2", "file": "output_v2.jsonl", "description": "Latest export after changing/refining the plan generation." }, { "label": "Outputs from 2.5 Flash", "file": "output_v3_Flash.jsonl", "description": "Outputs generated from the 2.5 Flash evaluation run." }, { "label": "Sample 500 Questions", "file": "500_questions.jsonl", "description": "500 sample questions generated using gemini 2.5 Flash." }, { "label": "Questions without FMI", "file": "40_questions_without_FMI.jsonl", "description": "40 sample questions generated when Financial Markets and Investments is not an included class." }, { "label": "40 Questions with FMI", "file": "40_questions_direct.jsonl", "description": "40 sample questions generated when Financial Markets and Investments is an included class." } ]