siege / openenv.yaml
BART-ender's picture
Upload folder using huggingface_hub
a12d38f verified
raw
history blame contribute delete
640 Bytes
name: interp-arena
version: 0.1.0
description: >
Interpretability Arena: Red vs Blue — a multi-agent adversarial environment
where agents learn to manipulate and defend LLM internal states using
mechanistic interpretability techniques (TransformerLens).
author: ""
license: BSD-3-Clause
# Server entry point
server:
module: server.app
host: 0.0.0.0
port: 8000
README: interp_arena/README.md
# Action / Observation types
action_type: models.InterpArenaAction
observation_type: models.InterpArenaObservation
state_type: models.InterpArenaState
# Container
docker:
dockerfile: server/Dockerfile
image: interp-arena:latest