Spaces:

bpHigh
/

financial-task-env

Running

File size: 11,693 Bytes
"""Task definitions for the Financial Task Environment.

Contains 10 tasks backed by real Excel workbooks covering diverse enterprise
finance & accounting workflows (QA, calculation, validation, data entry,
formatting, modeling, consolidation).  Each task ships a source .xlsx that
the agent must read or modify via Python code execution.
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import Any, Dict, List

# Base directory where xlsx files live (data/<task_id>/)
DATA_DIR = Path(os.environ.get("FINANCIAL_ENV_DATA_DIR", Path(__file__).parent / "data"))

TASKS: Dict[str, Dict[str, Any]] = {}

# ---------------------------------------------------------------------------
# Helper to build source / reference paths
# ---------------------------------------------------------------------------

def _paths(task_id: str, src: str, ref: str | None = None):
    """Return dict with resolved source and optional reference paths."""
    d: Dict[str, Any] = {
        "source_file": str(DATA_DIR / task_id / src),
    }
    if ref:
        d["reference_file"] = str(DATA_DIR / task_id / ref)
    return d

# ── EASY ──────────────────────────────────────────────────────────────────

# Task 1 — QA: count rows (Calculation)
TASKS["task_1"] = {
    "id": "task_1",
    "orig_id": "119",
    "title": "Count Plants in Spreadsheet",
    "difficulty": "easy",
    "task_type": "QA",
    "category": "Calculation",
    "instruction": "How many plants are recorded in the spreadsheet?",
    "constraints": "",
    "reference_answer": "85",
    **_paths("119", "119_src_0.xlsx"),
}

# Task 2 — QA: value retrieval (Cross-sheet Retrieval)
TASKS["task_2"] = {
    "id": "task_2",
    "orig_id": "118",
    "title": "Retrieve TW EOL Charge",
    "difficulty": "easy",
    "task_type": "QA",
    "category": "Cross-sheet/file Retrieval",
    "instruction": "What is the TW EOL charge for 2002? Please provide just the amount.",
    "constraints": "",
    "reference_answer": "113291",
    **_paths("118", "118_src_0.xlsx"),
}

# Task 3 — QA: multi-step calculation (Calculation)
TASKS["task_3"] = {
    "id": "task_3",
    "orig_id": "34",
    "title": "Portfolio Mark-to-Market Change",
    "difficulty": "easy",
    "task_type": "QA",
    "category": "Calculation",
    "instruction": (
        "Assume the following changes occur in the Jul\u2013Dec 2002 market: "
        "Flat curve prices increase uniformly by $2/MWh; Peak 6x16 curve prices "
        "increase uniformly by $5/MWh; monthly contract volumes (Flat and Peak "
        "Total MWh) remain unchanged. Based on the 2002 table, calculate: "
        "(1) the total added value (mark-to-market change) for the combined "
        "Flat + Peak portfolio; and (2) what percentage of this added value "
        "comes from the Peak 6x16 contracts rather than the Flat contracts."
    ),
    "constraints": "",
    "reference_answer": (
        "The total added value of the July\u2013December 2002 portfolio is "
        "$1,989,600 (in absolute terms). Of this amount, approximately 27.9% "
        "(about 28%) comes from the Peak 6x16 contracts, with the remaining "
        "~72.1% coming from the Flat contracts."
    ),
    **_paths("34", "34_src_0.xlsx"),
}

# ── MEDIUM ────────────────────────────────────────────────────────────────

# Task 4 — Modify: summarise imbalances (Calculation + modify)
TASKS["task_4"] = {
    "id": "task_4",
    "orig_id": "35",
    "title": "Summarize Pipeline Imbalances",
    "difficulty": "medium",
    "task_type": "MODIFY",
    "category": "Calculation",
    "instruction": (
        "Summarize the volume and dollar imbalances that exist between the "
        "various pipeline operators (Operators) and Transwestern."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("35", "35_src_0.xlsx", "35_ref_0.xlsx"),
}

# Task 5 — Modify: audit & fix formulas (Validation / Review)
TASKS["task_5"] = {
    "id": "task_5",
    "orig_id": "40",
    "title": "Audit and Correct Formula Errors",
    "difficulty": "medium",
    "task_type": "MODIFY",
    "category": "Validation / Review, Calculation",
    "instruction": (
        "Audit the workbook and correct the formula errors in place so numbers "
        "calculate properly."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("40", "40_src_0.xlsx", "40_ref_0.xlsx"),
}

# Task 6 — Modify: create table + filter (Structuring / Formatting)
TASKS["task_6"] = {
    "id": "task_6",
    "orig_id": "60",
    "title": "Create Table and Apply Filter",
    "difficulty": "medium",
    "task_type": "MODIFY",
    "category": "Structuring / Formatting",
    "instruction": (
        "On the All Natural Gas sheet, create an Excel table and filter to "
        "show only the COUNTERPARTY entries highlighted in red."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("60", "60_src_0.xlsx", "60_ref_0.xlsx"),
}

# Task 7 — Modify: data entry + formatting (Data Entry / Import)
TASKS["task_7"] = {
    "id": "task_7",
    "orig_id": "21",
    "title": "Add Weekday Row and Data Entry",
    "difficulty": "medium",
    "task_type": "MODIFY",
    "category": "Data Entry / Import, Structuring / Formatting",
    "instruction": (
        "Add a weekday line directly below the date headers and update the "
        "12/31/2001 (Mon) column. For that day, there are no \"Receipts\"; "
        "record disbursements of $1,980,800 to Calpine (Power Purchases) and "
        "$100,000 to an unspecified vendor (Gas Purchases). Under Enron Facility "
        "Services, enter $3,101,855 for \"$2.5 per day\" and -$2,081,386 for "
        "\"estimate receipt\"; in Personnel, EES is $584,500; leave all other "
        "items as \"-\"."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("21", "21_src_0.xlsx", "21_ref_0.xlsx"),
}

# ── HARD ──────────────────────────────────────────────────────────────────

# Task 8 — Modify: balance-sheet validation + indicator calcs
TASKS["task_8"] = {
    "id": "task_8",
    "orig_id": "0",
    "title": "Balance Sheet Validation and Indicators",
    "difficulty": "hard",
    "task_type": "MODIFY",
    "category": "Validation / Review, Calculation, Structuring / Formatting",
    "instruction": (
        "Complete the validation and indicator calculations as follows: on the "
        "Balance Sheet, add a control to ensure TOTAL ASSETS equals TOTAL "
        "LIABILITIES AND EQUITY; on the Income Statement (Revenue & Expenses), "
        "add an Equity Roll Forward Test to reconcile equity movement and "
        "highlight any differences."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("0", "0_src_0.xlsx", "0_ref_0.xlsx"),
}

# Task 9 — Modify: add new sheet mirroring structure (Financial Modeling)
TASKS["task_9"] = {
    "id": "task_9",
    "orig_id": "24",
    "title": "Create Scenario3 Worksheet",
    "difficulty": "hard",
    "task_type": "MODIFY",
    "category": "Structuring / Formatting, Financial Modeling",
    "instruction": (
        'Add a new worksheet named "Scenario3" to the same workbook, mirroring '
        "the structure, row/column layout, monthly detail table, and chart area "
        'of "Scenario1". For Scenario3, update the hedging assumptions to a '
        "balanced allocation: 10-Yr 25%, 5-Yr 20%, 1-Yr 15%, May-Sep 20%, "
        "Q3 15%. Keep the note \"Maximum Monthly Average Short Position to "
        'Cover (July Peak) = 30,508 MW" unchanged; only the new sheet should '
        "be added, and formulas may be used within it."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("24", "24_src_0.xlsx", "24_ref_0.xlsx"),
}

# Task 10 — Modify: cross-sheet consolidation (multi-type)
TASKS["task_10"] = {
    "id": "task_10",
    "orig_id": "67",
    "title": "Consolidate by Type and Area",
    "difficulty": "hard",
    "task_type": "MODIFY",
    "category": "Structuring / Formatting, Calculation, Validation / Review, Cross-sheet Retrieval",
    "instruction": (
        "Create a new 'by type_area' worksheet based on the Summary and the "
        "other tabs. It should present two separate tables summarized by "
        "Imbal Type; within each table, consolidate by area, include Volume, "
        "Value and Date, and calculate totals. Finally, confirm that the value "
        "and volume totals tie to the totals shown on the Summary."
    ),
    "constraints": (
        "You will be given an Excel file as input. Perform all required "
        "operations by modifying the existing workbook. You may add new sheets "
        "if necessary, but you must preserve all original sheets and their "
        "contents. Return the full updated workbook."
    ),
    **_paths("67", "67_src_0.xlsx", "67_ref_0.xlsx"),
}

# ---------------------------------------------------------------------------
# Helper accessors
# ---------------------------------------------------------------------------

TASK_IDS: List[str] = sorted(TASKS.keys(), key=lambda x: int(x.split("_")[1]))


def get_task(task_id: str) -> Dict[str, Any]:
    """Return a task dict by ID or raise KeyError."""
    if task_id not in TASKS:
        raise KeyError(
            f"Unknown task_id '{task_id}'. Available: {', '.join(TASK_IDS)}"
        )
    return TASKS[task_id]


def list_tasks() -> List[Dict[str, str]]:
    """Return a summary list of all tasks."""
    return [
        {
            "id": t["id"],
            "title": t["title"],
            "difficulty": t["difficulty"],
            "task_type": t["task_type"],
            "category": t["category"],
        }
        for t in (TASKS[tid] for tid in TASK_IDS)
    ]