Spaces:
Running
Running
| spec_version: 1 | |
| name: financial_task_env | |
| type: space | |
| runtime: fastapi | |
| app: server.app:app | |
| port: 8000 | |
| tasks: | |
| - id: task_1 | |
| name: Count Plants in Spreadsheet | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference (85). Score 0.0β1.0 based on numeric match with 5% tolerance." | |
| - id: task_2 | |
| name: Retrieve TW EOL Charge | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference (113291). Score 0.0β1.0 based on numeric match with 5% tolerance." | |
| - id: task_3 | |
| name: Portfolio Mark-to-Market Change | |
| difficulty: easy | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "QA grading β extracts numbers from agent answer, compares against reference values ($1,989,600 and 27.9%). Score 0.0β1.0 based on numeric match + keyword overlap." | |
| - id: task_5 | |
| name: Audit and Correct Formula Errors | |
| difficulty: medium | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.0β1.0." | |
| - id: task_8 | |
| name: Balance Sheet Validation and Indicators | |
| difficulty: hard | |
| max_steps: 15 | |
| grader: | |
| type: programmatic | |
| description: "MODIFY grading β compares agent output xlsx cell-by-cell against reference workbook. 30% sheet-name match + 70% cell-level match (2% numeric tolerance). Score 0.0β1.0." | |