File size: 640 Bytes
433f30e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a12d38f
433f30e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
name: interp-arena
version: 0.1.0
description: >
  Interpretability Arena: Red vs Blue — a multi-agent adversarial environment
  where agents learn to manipulate and defend LLM internal states using
  mechanistic interpretability techniques (TransformerLens).
author: ""
license: BSD-3-Clause

# Server entry point
server:
  module: server.app
  host: 0.0.0.0
  port: 8000

README: interp_arena/README.md
# Action / Observation types
action_type: models.InterpArenaAction
observation_type: models.InterpArenaObservation
state_type: models.InterpArenaState

# Container
docker:
  dockerfile: server/Dockerfile
  image: interp-arena:latest