Spaces:
Sleeping
Sleeping
Commit ·
95f11da
1
Parent(s): 3816847
refactor: update all imports for new directory structure
Browse filesUpdate import paths in server/, tests/, and root __init__.py to
reference modules in their new locations (core/, evaluation/,
runners/, scenarios/).
- __init__.py +2 -2
- server/app.py +12 -12
- server/chargeback_ops_environment.py +8 -8
- tests/test_agent_audit.py +1 -1
- tests/test_api.py +2 -2
- tests/test_env.py +2 -2
- tests/test_grader.py +2 -2
- tests/test_requirements.py +5 -5
__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""ChargebackOps OpenEnv package."""
|
| 2 |
|
| 3 |
-
from .client import ChargebackOpsEnv
|
| 4 |
-
from .models import (
|
| 5 |
BaselineRunResult,
|
| 6 |
ChargebackOpsAction,
|
| 7 |
ChargebackOpsObservation,
|
|
|
|
| 1 |
"""ChargebackOps OpenEnv package."""
|
| 2 |
|
| 3 |
+
from .core.client import ChargebackOpsEnv
|
| 4 |
+
from .core.models import (
|
| 5 |
BaselineRunResult,
|
| 6 |
ChargebackOpsAction,
|
| 7 |
ChargebackOpsObservation,
|
server/app.py
CHANGED
|
@@ -13,30 +13,30 @@ except Exception as exc: # pragma: no cover
|
|
| 13 |
) from exc
|
| 14 |
|
| 15 |
try:
|
| 16 |
-
from ..baseline_runner import run_baseline
|
| 17 |
-
from ..episode_store import get_report
|
| 18 |
-
from ..inference import run_inference
|
| 19 |
-
from ..models import (
|
| 20 |
BaselineRunResult,
|
| 21 |
ChargebackOpsAction,
|
| 22 |
ChargebackOpsObservation,
|
| 23 |
TasksResponse,
|
| 24 |
TaskSummary,
|
| 25 |
)
|
| 26 |
-
from ..simulation import list_tasks
|
| 27 |
from .chargeback_ops_environment import ChargebackOpsEnvironment
|
| 28 |
except ImportError: # pragma: no cover
|
| 29 |
-
from baseline_runner import run_baseline
|
| 30 |
-
from episode_store import get_report
|
| 31 |
-
from inference import run_inference
|
| 32 |
-
from models import (
|
| 33 |
BaselineRunResult,
|
| 34 |
ChargebackOpsAction,
|
| 35 |
ChargebackOpsObservation,
|
| 36 |
TasksResponse,
|
| 37 |
TaskSummary,
|
| 38 |
)
|
| 39 |
-
from simulation import list_tasks
|
| 40 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 41 |
|
| 42 |
|
|
@@ -95,9 +95,9 @@ def generate_tasks(
|
|
| 95 |
"""Generate parametric tasks from a seed for infinite scenario variety."""
|
| 96 |
|
| 97 |
try:
|
| 98 |
-
from case_generator import generate_task_suite
|
| 99 |
except ImportError: # pragma: no cover
|
| 100 |
-
from ..case_generator import generate_task_suite
|
| 101 |
|
| 102 |
suite = generate_task_suite(
|
| 103 |
base_seed=seed, easy_count=easy, medium_count=medium, hard_count=hard,
|
|
|
|
| 13 |
) from exc
|
| 14 |
|
| 15 |
try:
|
| 16 |
+
from ..runners.baseline_runner import run_baseline
|
| 17 |
+
from ..core.episode_store import get_report
|
| 18 |
+
from ..runners.inference import run_inference
|
| 19 |
+
from ..core.models import (
|
| 20 |
BaselineRunResult,
|
| 21 |
ChargebackOpsAction,
|
| 22 |
ChargebackOpsObservation,
|
| 23 |
TasksResponse,
|
| 24 |
TaskSummary,
|
| 25 |
)
|
| 26 |
+
from ..scenarios.simulation import list_tasks
|
| 27 |
from .chargeback_ops_environment import ChargebackOpsEnvironment
|
| 28 |
except ImportError: # pragma: no cover
|
| 29 |
+
from runners.baseline_runner import run_baseline
|
| 30 |
+
from core.episode_store import get_report
|
| 31 |
+
from runners.inference import run_inference
|
| 32 |
+
from core.models import (
|
| 33 |
BaselineRunResult,
|
| 34 |
ChargebackOpsAction,
|
| 35 |
ChargebackOpsObservation,
|
| 36 |
TasksResponse,
|
| 37 |
TaskSummary,
|
| 38 |
)
|
| 39 |
+
from scenarios.simulation import list_tasks
|
| 40 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 41 |
|
| 42 |
|
|
|
|
| 95 |
"""Generate parametric tasks from a seed for infinite scenario variety."""
|
| 96 |
|
| 97 |
try:
|
| 98 |
+
from scenarios.case_generator import generate_task_suite
|
| 99 |
except ImportError: # pragma: no cover
|
| 100 |
+
from ..scenarios.case_generator import generate_task_suite
|
| 101 |
|
| 102 |
suite = generate_task_suite(
|
| 103 |
base_seed=seed, easy_count=easy, medium_count=medium, hard_count=hard,
|
server/chargeback_ops_environment.py
CHANGED
|
@@ -7,9 +7,9 @@ from uuid import uuid4
|
|
| 7 |
from openenv.core.env_server.interfaces import Environment
|
| 8 |
|
| 9 |
try:
|
| 10 |
-
from ..episode_store import record_report
|
| 11 |
-
from ..grading import grade_episode
|
| 12 |
-
from ..models import (
|
| 13 |
ActionTraceItem,
|
| 14 |
CaseQueueItem,
|
| 15 |
CaseResolutionState,
|
|
@@ -20,11 +20,11 @@ try:
|
|
| 20 |
PolicyView,
|
| 21 |
VisibleCase,
|
| 22 |
)
|
| 23 |
-
from ..simulation import ActionRecord, CaseProgress, InternalCase, get_task
|
| 24 |
except ImportError: # pragma: no cover
|
| 25 |
-
from episode_store import record_report
|
| 26 |
-
from grading import grade_episode
|
| 27 |
-
from models import (
|
| 28 |
ActionTraceItem,
|
| 29 |
CaseQueueItem,
|
| 30 |
CaseResolutionState,
|
|
@@ -35,7 +35,7 @@ except ImportError: # pragma: no cover
|
|
| 35 |
PolicyView,
|
| 36 |
VisibleCase,
|
| 37 |
)
|
| 38 |
-
from simulation import ActionRecord, CaseProgress, InternalCase, get_task
|
| 39 |
|
| 40 |
|
| 41 |
class ChargebackOpsEnvironment(
|
|
|
|
| 7 |
from openenv.core.env_server.interfaces import Environment
|
| 8 |
|
| 9 |
try:
|
| 10 |
+
from ..core.episode_store import record_report
|
| 11 |
+
from ..evaluation.grading import grade_episode
|
| 12 |
+
from ..core.models import (
|
| 13 |
ActionTraceItem,
|
| 14 |
CaseQueueItem,
|
| 15 |
CaseResolutionState,
|
|
|
|
| 20 |
PolicyView,
|
| 21 |
VisibleCase,
|
| 22 |
)
|
| 23 |
+
from ..scenarios.simulation import ActionRecord, CaseProgress, InternalCase, get_task
|
| 24 |
except ImportError: # pragma: no cover
|
| 25 |
+
from core.episode_store import record_report
|
| 26 |
+
from evaluation.grading import grade_episode
|
| 27 |
+
from core.models import (
|
| 28 |
ActionTraceItem,
|
| 29 |
CaseQueueItem,
|
| 30 |
CaseResolutionState,
|
|
|
|
| 35 |
PolicyView,
|
| 36 |
VisibleCase,
|
| 37 |
)
|
| 38 |
+
from scenarios.simulation import ActionRecord, CaseProgress, InternalCase, get_task
|
| 39 |
|
| 40 |
|
| 41 |
class ChargebackOpsEnvironment(
|
tests/test_agent_audit.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from agent_brutal_audit import aggregate_results, run_episode
|
| 2 |
|
| 3 |
|
| 4 |
def test_heuristic_beats_bad_on_generated_suite():
|
|
|
|
| 1 |
+
from evaluation.agent_brutal_audit import aggregate_results, run_episode
|
| 2 |
|
| 3 |
|
| 4 |
def test_heuristic_beats_bad_on_generated_suite():
|
tests/test_api.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
from inference import run_inference
|
| 2 |
-
from models import ChargebackOpsAction
|
| 3 |
from server.app import baseline, grader, root, tasks
|
| 4 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 5 |
|
|
|
|
| 1 |
+
from runners.inference import run_inference
|
| 2 |
+
from core.models import ChargebackOpsAction
|
| 3 |
from server.app import baseline, grader, root, tasks
|
| 4 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 5 |
|
tests/test_env.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
from case_generator import generate_task
|
| 2 |
-
from models import ChargebackOpsAction
|
| 3 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 4 |
|
| 5 |
|
|
|
|
| 1 |
+
from scenarios.case_generator import generate_task
|
| 2 |
+
from core.models import ChargebackOpsAction
|
| 3 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 4 |
|
| 5 |
|
tests/test_grader.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
from grading import grade_episode
|
| 2 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 3 |
-
from simulation import get_task
|
| 4 |
|
| 5 |
|
| 6 |
def test_grade_episode_bounds():
|
|
|
|
| 1 |
+
from evaluation.grading import grade_episode
|
| 2 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 3 |
+
from scenarios.simulation import get_task
|
| 4 |
|
| 5 |
|
| 6 |
def test_grade_episode_bounds():
|
tests/test_requirements.py
CHANGED
|
@@ -2,11 +2,11 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
-
from baseline_runner import _heuristic_pick, _obvious_next_action, candidate_actions
|
| 6 |
-
from grading import grade_episode
|
| 7 |
-
from models import ChargebackOpsAction
|
| 8 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 9 |
-
from simulation import get_task, list_tasks
|
| 10 |
|
| 11 |
|
| 12 |
def _run_heuristic_episode(task_id: str) -> tuple[float, float]:
|
|
@@ -162,7 +162,7 @@ def test_problem_statement_live_agent_budget_targets_real_branches():
|
|
| 162 |
|
| 163 |
|
| 164 |
def test_problem_statement_inference_contract_exists():
|
| 165 |
-
content = Path("inference.py").read_text()
|
| 166 |
assert "from openai import OpenAI" in content
|
| 167 |
assert "API_BASE_URL" in content
|
| 168 |
assert "MODEL_NAME" in content
|
|
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
+
from runners.baseline_runner import _heuristic_pick, _obvious_next_action, candidate_actions
|
| 6 |
+
from evaluation.grading import grade_episode
|
| 7 |
+
from core.models import ChargebackOpsAction
|
| 8 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 9 |
+
from scenarios.simulation import get_task, list_tasks
|
| 10 |
|
| 11 |
|
| 12 |
def _run_heuristic_episode(task_id: str) -> tuple[float, float]:
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
def test_problem_statement_inference_contract_exists():
|
| 165 |
+
content = Path("runners/inference.py").read_text()
|
| 166 |
assert "from openai import OpenAI" in content
|
| 167 |
assert "API_BASE_URL" in content
|
| 168 |
assert "MODEL_NAME" in content
|