Spaces:
Sleeping
Sleeping
Commit Β·
3816847
1
Parent(s): dc67b35
refactor: reorganize source files into core/, evaluation/, runners/, scenarios/ directories
Browse filesMoved loose root-level Python modules into logical subdirectories:
- core/ β models, client, episode_store
- evaluation/ β grading, agent_brutal_audit
- runners/ β baseline_runner, inference
- scenarios/ β simulation, case_generator, iso_adapter
- core/__init__.py +1 -0
- client.py β core/client.py +0 -0
- episode_store.py β core/episode_store.py +0 -0
- models.py β core/models.py +0 -0
- evaluation/__init__.py +1 -0
- agent_brutal_audit.py β evaluation/agent_brutal_audit.py +4 -4
- grading.py β evaluation/grading.py +4 -4
- runners/__init__.py +1 -0
- baseline_runner.py β runners/baseline_runner.py +7 -7
- inference.py β runners/inference.py +8 -8
- scenarios/__init__.py +1 -0
- case_generator.py β scenarios/case_generator.py +0 -0
- iso_adapter.py β scenarios/iso_adapter.py +0 -0
- simulation.py β scenarios/simulation.py +0 -0
core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Core data models, client, and storage for ChargebackOps."""
|
client.py β core/client.py
RENAMED
|
File without changes
|
episode_store.py β core/episode_store.py
RENAMED
|
File without changes
|
models.py β core/models.py
RENAMED
|
File without changes
|
evaluation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Grading and audit modules for ChargebackOps."""
|
agent_brutal_audit.py β evaluation/agent_brutal_audit.py
RENAMED
|
@@ -21,11 +21,11 @@ import statistics
|
|
| 21 |
from pathlib import Path
|
| 22 |
from typing import Any
|
| 23 |
|
| 24 |
-
from baseline_runner import _heuristic_pick, _obvious_next_action, candidate_actions
|
| 25 |
-
from grading import grade_episode
|
| 26 |
-
from models import ChargebackOpsAction
|
| 27 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 28 |
-
from simulation import get_task
|
| 29 |
|
| 30 |
DATA_DIR = Path("data")
|
| 31 |
|
|
|
|
| 21 |
from pathlib import Path
|
| 22 |
from typing import Any
|
| 23 |
|
| 24 |
+
from runners.baseline_runner import _heuristic_pick, _obvious_next_action, candidate_actions
|
| 25 |
+
from evaluation.grading import grade_episode
|
| 26 |
+
from core.models import ChargebackOpsAction
|
| 27 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 28 |
+
from scenarios.simulation import get_task
|
| 29 |
|
| 30 |
DATA_DIR = Path("data")
|
| 31 |
|
grading.py β evaluation/grading.py
RENAMED
|
@@ -3,11 +3,11 @@
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
try:
|
| 6 |
-
from .models import CaseScoreBreakdown, GraderReport
|
| 7 |
-
from .simulation import CaseProgress, InternalCase, TaskScenario
|
| 8 |
except ImportError: # pragma: no cover
|
| 9 |
-
from models import CaseScoreBreakdown, GraderReport
|
| 10 |
-
from simulation import CaseProgress, InternalCase, TaskScenario
|
| 11 |
|
| 12 |
|
| 13 |
def _ratio(numerator: int, denominator: int) -> float:
|
|
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
try:
|
| 6 |
+
from ..core.models import CaseScoreBreakdown, GraderReport
|
| 7 |
+
from ..scenarios.simulation import CaseProgress, InternalCase, TaskScenario
|
| 8 |
except ImportError: # pragma: no cover
|
| 9 |
+
from core.models import CaseScoreBreakdown, GraderReport
|
| 10 |
+
from scenarios.simulation import CaseProgress, InternalCase, TaskScenario
|
| 11 |
|
| 12 |
|
| 13 |
def _ratio(numerator: int, denominator: int) -> float:
|
runners/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Baseline and inference runners for ChargebackOps."""
|
baseline_runner.py β runners/baseline_runner.py
RENAMED
|
@@ -12,15 +12,15 @@ from openai import OpenAI
|
|
| 12 |
from pydantic import BaseModel, Field
|
| 13 |
|
| 14 |
try:
|
| 15 |
-
from .grading import grade_episode
|
| 16 |
-
from .models import BaselineRunResult, BaselineTaskResult, ChargebackOpsAction
|
| 17 |
-
from .server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 18 |
-
from .simulation import list_tasks
|
| 19 |
except ImportError: # pragma: no cover
|
| 20 |
-
from grading import grade_episode
|
| 21 |
-
from models import BaselineRunResult, BaselineTaskResult, ChargebackOpsAction
|
| 22 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 23 |
-
from simulation import list_tasks
|
| 24 |
|
| 25 |
try: # pragma: no cover
|
| 26 |
from dotenv import load_dotenv
|
|
|
|
| 12 |
from pydantic import BaseModel, Field
|
| 13 |
|
| 14 |
try:
|
| 15 |
+
from ..evaluation.grading import grade_episode
|
| 16 |
+
from ..core.models import BaselineRunResult, BaselineTaskResult, ChargebackOpsAction
|
| 17 |
+
from ..server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 18 |
+
from ..scenarios.simulation import list_tasks
|
| 19 |
except ImportError: # pragma: no cover
|
| 20 |
+
from evaluation.grading import grade_episode
|
| 21 |
+
from core.models import BaselineRunResult, BaselineTaskResult, ChargebackOpsAction
|
| 22 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 23 |
+
from scenarios.simulation import list_tasks
|
| 24 |
|
| 25 |
try: # pragma: no cover
|
| 26 |
from dotenv import load_dotenv
|
inference.py β runners/inference.py
RENAMED
|
@@ -25,12 +25,12 @@ try:
|
|
| 25 |
_strict_llm_mode,
|
| 26 |
candidate_actions,
|
| 27 |
)
|
| 28 |
-
from .grading import grade_episode
|
| 29 |
-
from .models import BaselineRunResult, BaselineTaskResult
|
| 30 |
-
from .server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 31 |
-
from .simulation import list_tasks
|
| 32 |
except ImportError: # pragma: no cover
|
| 33 |
-
from baseline_runner import (
|
| 34 |
MAX_PROVIDER_RESPONSE_TOKENS,
|
| 35 |
_chat_completion_with_retry,
|
| 36 |
_heuristic_pick,
|
|
@@ -40,10 +40,10 @@ except ImportError: # pragma: no cover
|
|
| 40 |
_strict_llm_mode,
|
| 41 |
candidate_actions,
|
| 42 |
)
|
| 43 |
-
from grading import grade_episode
|
| 44 |
-
from models import BaselineRunResult, BaselineTaskResult
|
| 45 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 46 |
-
from simulation import list_tasks
|
| 47 |
|
| 48 |
if load_dotenv is not None: # pragma: no cover
|
| 49 |
load_dotenv()
|
|
|
|
| 25 |
_strict_llm_mode,
|
| 26 |
candidate_actions,
|
| 27 |
)
|
| 28 |
+
from ..evaluation.grading import grade_episode
|
| 29 |
+
from ..core.models import BaselineRunResult, BaselineTaskResult
|
| 30 |
+
from ..server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 31 |
+
from ..scenarios.simulation import list_tasks
|
| 32 |
except ImportError: # pragma: no cover
|
| 33 |
+
from runners.baseline_runner import (
|
| 34 |
MAX_PROVIDER_RESPONSE_TOKENS,
|
| 35 |
_chat_completion_with_retry,
|
| 36 |
_heuristic_pick,
|
|
|
|
| 40 |
_strict_llm_mode,
|
| 41 |
candidate_actions,
|
| 42 |
)
|
| 43 |
+
from evaluation.grading import grade_episode
|
| 44 |
+
from core.models import BaselineRunResult, BaselineTaskResult
|
| 45 |
from server.chargeback_ops_environment import ChargebackOpsEnvironment
|
| 46 |
+
from scenarios.simulation import list_tasks
|
| 47 |
|
| 48 |
if load_dotenv is not None: # pragma: no cover
|
| 49 |
load_dotenv()
|
scenarios/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Task scenarios, case generation, and ISO adapters for ChargebackOps."""
|
case_generator.py β scenarios/case_generator.py
RENAMED
|
File without changes
|
iso_adapter.py β scenarios/iso_adapter.py
RENAMED
|
File without changes
|
simulation.py β scenarios/simulation.py
RENAMED
|
File without changes
|