File size: 2,540 Bytes
433f30e
 
 
 
 
38df389
 
 
433f30e
 
 
 
 
 
 
a12d38f
 
 
433f30e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38df389
 
 
 
433f30e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
[project]
name = "interp-arena"
version = "0.1.0"
description = "Interpretability Arena: Red vs Blue — mechanistic adversarial LLM environment (OpenEnv)"
readme = "README.md"
# Unsloth (GRPO) compiles TRL patches at import; Python 3.14+ is not supported yet and can produce
# SyntaxError in e.g. UnslothGRPOTrainer.py. Use 3.10–3.13 for training; arena server alone is fine.
requires-python = ">=3.10,<3.14"
license = { text = "BSD-3-Clause" }

dependencies = [
    # OpenEnv
    "openenv-core>=0.2.3",
    "fastapi>=0.104.0",
    "uvicorn>=0.24.0",
    # Mechanistic interpretability (keep in sync with server/requirements.txt; 5.x breaks TL lazy imports)
    "transformer-lens==3.0.0",
    "transformers==4.56.2",
    # ML
    "torch>=2.1.0",
    "accelerate>=0.27.0",
    "datasets>=2.18.0",
    # Logging & config
    "wandb>=0.16.0",
    "omegaconf>=2.3.0",
    "hydra-core>=1.3.0",
    # RL / PEFT
    # 0.26+ no longer eager-imports mergekit in callbacks (avoids mergekit/pydantic); cap avoids surprise API breaks
    "trl>=0.26.0,<0.27",
    "peft>=0.11.0",
    "bitsandbytes>=0.43.0",
    # Utilities
    "rich>=13.7.0",
    "numpy>=1.26.0",
    "tqdm>=4.66.0",
    "requests>=2.31.0",
    "python-dotenv>=1.0.0",
]

[project.optional-dependencies]
dev = [
    "pytest>=8.0.0",
    "pytest-asyncio>=0.23.0",
    "pytest-cov>=5.0.0",
    "ruff>=0.4.0",
    "httpx>=0.27.0",
]
gpu = [
    # OpenEnv (HTTP env server + EnvClient) — required for train_grpo talking to the arena
    "openenv-core>=0.2.3",
    # GRPO + Unsloth LoRA (requires CUDA for training; `uv sync --extra gpu`)
    "unsloth",
    # 0.26+ no longer eager-imports mergekit in callbacks (avoids mergekit/pydantic); cap avoids surprise API breaks
    "trl>=0.26.0,<0.27",
    "peft>=0.11.0",
    "bitsandbytes>=0.43.0",
]

[project.scripts]
# `server` name required by `openenv validate` (multi-mode deployment)
server     = "server.app:main"
train      = "scripts.train:main"
train-grpo = "scripts.train_grpo:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["interp_arena"]

[tool.ruff]
line-length = 100
target-version = "py310"

[tool.ruff.lint]
select = ["E", "F", "I", "UP"]

[tool.ruff.lint.per-file-ignores]
# Unsloth must import after mergekit check + inspect patch (see unsloth_inspect).
"scripts/train_grpo.py" = ["E402"]

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--tb=short -v"
asyncio_mode = "auto"