Spaces:
Build error
Build error
Kaushik Rajan
commited on
Commit
·
e526e6a
0
Parent(s):
Phase 1: Initial SPIRAL project setup
Browse filesComplete structure with Gradio interface, config, and documentation
- .gitignore +227 -0
- README.md +101 -0
- app/app.py +255 -0
- config.yaml +124 -0
- data/README.md +45 -0
- execution-plan.md +54 -0
- requirements.txt +44 -0
- src/__init__.py +15 -0
- src/games/__init__.py +12 -0
- src/models/__init__.py +13 -0
- src/reasoning/__init__.py +13 -0
- src/training/__init__.py +13 -0
- tests/test_basic.py +130 -0
.gitignore
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
| 158 |
+
# project, it is recommended to include the template in the project gitignore.
|
| 159 |
+
# https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 160 |
+
# and can be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
| 161 |
+
# project, it is recommended to include the template in the project gitignore.
|
| 162 |
+
# https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 163 |
+
.idea/
|
| 164 |
+
|
| 165 |
+
# VS Code
|
| 166 |
+
.vscode/
|
| 167 |
+
|
| 168 |
+
# macOS
|
| 169 |
+
.DS_Store
|
| 170 |
+
.AppleDouble
|
| 171 |
+
.LSOverride
|
| 172 |
+
|
| 173 |
+
# Windows
|
| 174 |
+
Thumbs.db
|
| 175 |
+
Thumbs.db:encryptable
|
| 176 |
+
ehthumbs.db
|
| 177 |
+
ehthumbs_vista.db
|
| 178 |
+
*.stackdump
|
| 179 |
+
[Dd]esktop.ini
|
| 180 |
+
$RECYCLE.BIN/
|
| 181 |
+
*.cab
|
| 182 |
+
*.msi
|
| 183 |
+
*.msix
|
| 184 |
+
*.msm
|
| 185 |
+
*.msp
|
| 186 |
+
*.lnk
|
| 187 |
+
|
| 188 |
+
# Model files and large data
|
| 189 |
+
*.bin
|
| 190 |
+
*.safetensors
|
| 191 |
+
*.pt
|
| 192 |
+
*.pth
|
| 193 |
+
*.ckpt
|
| 194 |
+
*.h5
|
| 195 |
+
*.pkl
|
| 196 |
+
*.pickle
|
| 197 |
+
models/*/
|
| 198 |
+
|
| 199 |
+
# Logs and experiments
|
| 200 |
+
logs/
|
| 201 |
+
wandb/
|
| 202 |
+
tensorboard/
|
| 203 |
+
*.log
|
| 204 |
+
|
| 205 |
+
# Temporary files
|
| 206 |
+
tmp/
|
| 207 |
+
temp/
|
| 208 |
+
*.tmp
|
| 209 |
+
*.temp
|
| 210 |
+
|
| 211 |
+
# Data files
|
| 212 |
+
data/*/
|
| 213 |
+
!data/README.md
|
| 214 |
+
|
| 215 |
+
# Hugging Face cache
|
| 216 |
+
.cache/
|
| 217 |
+
transformers_cache/
|
| 218 |
+
|
| 219 |
+
# Local environment variables
|
| 220 |
+
.env.local
|
| 221 |
+
.env.development.local
|
| 222 |
+
.env.test.local
|
| 223 |
+
.env.production.local
|
| 224 |
+
|
| 225 |
+
# Gradio temporary files
|
| 226 |
+
flagged/
|
| 227 |
+
gradio_cached_examples/
|
README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPIRAL: Interactive Reasoning Game Simulator
|
| 2 |
+
|
| 3 |
+
A practical, interactive tool based on the SPIRAL paper ("Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning") deployed on Hugging Face Spaces.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
This tool demonstrates how self-play training on zero-sum games can improve AI reasoning capabilities. Users can:
|
| 8 |
+
|
| 9 |
+
- **Play Games**: Engage with AI in games like Kuhn Poker and TicTacToe
|
| 10 |
+
- **View Reasoning**: See step-by-step AI reasoning traces during gameplay
|
| 11 |
+
- **Test Transfer**: Evaluate AI's reasoning skills on math problems and logic puzzles
|
| 12 |
+
- **Learn**: Understand AI decision-making through interactive visualizations
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
|
| 16 |
+
### For Non-Technical Users
|
| 17 |
+
- Simple web interface for playing games
|
| 18 |
+
- Visual reasoning explanations
|
| 19 |
+
- Educational tutorials about AI thinking
|
| 20 |
+
- No setup required - runs in browser
|
| 21 |
+
|
| 22 |
+
### For Technical Users
|
| 23 |
+
- Access to model weights and training scripts
|
| 24 |
+
- API endpoints for extending the system
|
| 25 |
+
- Custom game integration capabilities
|
| 26 |
+
- Fine-tuning examples and documentation
|
| 27 |
+
|
| 28 |
+
## Project Structure
|
| 29 |
+
|
| 30 |
+
```
|
| 31 |
+
SPIRAL/
|
| 32 |
+
├── src/ # Core implementation
|
| 33 |
+
│ ├── games/ # Game environments
|
| 34 |
+
│ ├── models/ # SPIRAL model implementation
|
| 35 |
+
│ ├── training/ # Self-play training logic
|
| 36 |
+
│ └── reasoning/ # Reasoning trace generation
|
| 37 |
+
├── models/ # Trained model weights
|
| 38 |
+
├── data/ # Game datasets and benchmarks
|
| 39 |
+
├── app/ # Gradio web interface
|
| 40 |
+
├── tests/ # Unit and integration tests
|
| 41 |
+
└── docs/ # Documentation and tutorials
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## Technology Stack
|
| 45 |
+
|
| 46 |
+
- **Backend**: Python 3.8+
|
| 47 |
+
- **ML Framework**: PyTorch, Transformers
|
| 48 |
+
- **RL Library**: Gymnasium, Stable Baselines3
|
| 49 |
+
- **Web Interface**: Gradio
|
| 50 |
+
- **Base Model**: Qwen-4B from Hugging Face
|
| 51 |
+
- **Deployment**: Hugging Face Spaces
|
| 52 |
+
|
| 53 |
+
## Development Phases
|
| 54 |
+
|
| 55 |
+
1. **Research and Planning** ✅
|
| 56 |
+
2. **Implementation** 🔄
|
| 57 |
+
3. **Testing and Optimization** 📋
|
| 58 |
+
4. **Deployment and Documentation** 📋
|
| 59 |
+
5. **Maintenance and Iteration** 📋
|
| 60 |
+
|
| 61 |
+
## Getting Started
|
| 62 |
+
|
| 63 |
+
### Prerequisites
|
| 64 |
+
- Python 3.8+
|
| 65 |
+
- PyTorch
|
| 66 |
+
- Hugging Face account (for model access)
|
| 67 |
+
|
| 68 |
+
### Installation
|
| 69 |
+
```bash
|
| 70 |
+
pip install -r requirements.txt
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### Quick Start
|
| 74 |
+
```bash
|
| 75 |
+
python app/app.py
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
## Citation
|
| 79 |
+
|
| 80 |
+
If you use this tool in your research, please cite the original SPIRAL paper:
|
| 81 |
+
|
| 82 |
+
```bibtex
|
| 83 |
+
@article{spiral2024,
|
| 84 |
+
title={Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning},
|
| 85 |
+
author={[Authors]},
|
| 86 |
+
journal={[Journal]},
|
| 87 |
+
year={2024}
|
| 88 |
+
}
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
## License
|
| 92 |
+
|
| 93 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 94 |
+
|
| 95 |
+
## Contributing
|
| 96 |
+
|
| 97 |
+
We welcome contributions! Please see CONTRIBUTING.md for guidelines.
|
| 98 |
+
|
| 99 |
+
## Support
|
| 100 |
+
|
| 101 |
+
For issues and questions, please use the GitHub Issues or contact us via Hugging Face Spaces.
|
app/app.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SPIRAL Interactive Reasoning Game Simulator - Main Gradio App
|
| 3 |
+
|
| 4 |
+
A practical tool demonstrating how self-play training on zero-sum games
|
| 5 |
+
can improve AI reasoning capabilities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import yaml
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
# Add the src directory to the path for imports
|
| 14 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 15 |
+
|
| 16 |
+
from typing import Tuple, Dict, Any, List, Optional
|
| 17 |
+
import logging
|
| 18 |
+
|
| 19 |
+
# Configure logging
|
| 20 |
+
logging.basicConfig(level=logging.INFO)
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
class SpiralApp:
|
| 24 |
+
"""Main application class for the SPIRAL reasoning simulator."""
|
| 25 |
+
|
| 26 |
+
def __init__(self, config_path: str = "../config.yaml"):
|
| 27 |
+
"""Initialize the SPIRAL app with configuration."""
|
| 28 |
+
self.config = self._load_config(config_path)
|
| 29 |
+
self.setup_logging()
|
| 30 |
+
|
| 31 |
+
# Initialize components (will be implemented in Phase 2)
|
| 32 |
+
self.game_interface = None
|
| 33 |
+
self.reasoning_interface = None
|
| 34 |
+
self.transfer_interface = None
|
| 35 |
+
|
| 36 |
+
logger.info("SPIRAL App initialized successfully")
|
| 37 |
+
|
| 38 |
+
def _load_config(self, config_path: str) -> Dict[str, Any]:
|
| 39 |
+
"""Load configuration from YAML file."""
|
| 40 |
+
try:
|
| 41 |
+
with open(config_path, 'r') as f:
|
| 42 |
+
config = yaml.safe_load(f)
|
| 43 |
+
return config
|
| 44 |
+
except FileNotFoundError:
|
| 45 |
+
logger.warning(f"Config file not found: {config_path}. Using defaults.")
|
| 46 |
+
return self._get_default_config()
|
| 47 |
+
|
| 48 |
+
def _get_default_config(self) -> Dict[str, Any]:
|
| 49 |
+
"""Get default configuration."""
|
| 50 |
+
return {
|
| 51 |
+
'interface': {
|
| 52 |
+
'title': 'SPIRAL: Interactive Reasoning Game Simulator',
|
| 53 |
+
'description': 'Play games against AI and explore reasoning capabilities',
|
| 54 |
+
'theme': 'default'
|
| 55 |
+
},
|
| 56 |
+
'games': {
|
| 57 |
+
'kuhn_poker': {'name': 'Kuhn Poker'},
|
| 58 |
+
'tictactoe': {'name': 'TicTacToe'}
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def setup_logging(self):
|
| 63 |
+
"""Set up logging configuration."""
|
| 64 |
+
log_config = self.config.get('logging', {})
|
| 65 |
+
level = getattr(logging, log_config.get('level', 'INFO'))
|
| 66 |
+
logging.getLogger().setLevel(level)
|
| 67 |
+
|
| 68 |
+
def play_game(self, game_type: str, user_move: str, game_state: str = "") -> Tuple[str, str, str]:
|
| 69 |
+
"""
|
| 70 |
+
Handle game play interaction.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
game_type: Type of game (kuhn_poker, tictactoe)
|
| 74 |
+
user_move: User's move input
|
| 75 |
+
game_state: Current game state
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
Tuple of (updated_game_state, ai_response, reasoning_trace)
|
| 79 |
+
"""
|
| 80 |
+
# Placeholder implementation - will be completed in Phase 2
|
| 81 |
+
if not user_move:
|
| 82 |
+
return game_state, "Please enter a move!", ""
|
| 83 |
+
|
| 84 |
+
# Simulate AI response
|
| 85 |
+
ai_response = f"AI responds to your move: {user_move}"
|
| 86 |
+
reasoning_trace = f"AI thinking: Analyzing move '{user_move}' in {game_type}..."
|
| 87 |
+
updated_state = f"{game_state}\nUser: {user_move}\nAI: {ai_response}"
|
| 88 |
+
|
| 89 |
+
return updated_state, ai_response, reasoning_trace
|
| 90 |
+
|
| 91 |
+
def test_reasoning(self, prompt: str, task_type: str = "math") -> Tuple[str, str]:
|
| 92 |
+
"""
|
| 93 |
+
Test AI reasoning on non-game tasks.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
prompt: User's reasoning prompt
|
| 97 |
+
task_type: Type of reasoning task
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
Tuple of (response, reasoning_trace)
|
| 101 |
+
"""
|
| 102 |
+
# Placeholder implementation - will be completed in Phase 2
|
| 103 |
+
if not prompt:
|
| 104 |
+
return "Please enter a reasoning prompt!", ""
|
| 105 |
+
|
| 106 |
+
response = f"AI response to: {prompt}"
|
| 107 |
+
reasoning_trace = f"Step-by-step reasoning for '{prompt}'..."
|
| 108 |
+
|
| 109 |
+
return response, reasoning_trace
|
| 110 |
+
|
| 111 |
+
def create_interface(self) -> gr.Blocks:
|
| 112 |
+
"""Create the main Gradio interface."""
|
| 113 |
+
title = self.config['interface']['title']
|
| 114 |
+
description = self.config['interface']['description']
|
| 115 |
+
|
| 116 |
+
with gr.Blocks(title=title, theme=self.config['interface']['theme']) as demo:
|
| 117 |
+
gr.Markdown(f"# {title}")
|
| 118 |
+
gr.Markdown(description)
|
| 119 |
+
|
| 120 |
+
with gr.Tabs():
|
| 121 |
+
# Game Play Tab
|
| 122 |
+
with gr.TabItem("🎮 Game Play"):
|
| 123 |
+
gr.Markdown("### Play zero-sum games against AI")
|
| 124 |
+
|
| 125 |
+
with gr.Row():
|
| 126 |
+
with gr.Column():
|
| 127 |
+
game_selector = gr.Dropdown(
|
| 128 |
+
choices=["kuhn_poker", "tictactoe"],
|
| 129 |
+
value="kuhn_poker",
|
| 130 |
+
label="Select Game"
|
| 131 |
+
)
|
| 132 |
+
user_move = gr.Textbox(
|
| 133 |
+
label="Your Move",
|
| 134 |
+
placeholder="Enter your move..."
|
| 135 |
+
)
|
| 136 |
+
play_button = gr.Button("Play Move", variant="primary")
|
| 137 |
+
|
| 138 |
+
with gr.Column():
|
| 139 |
+
game_state = gr.Textbox(
|
| 140 |
+
label="Game State",
|
| 141 |
+
lines=10,
|
| 142 |
+
interactive=False
|
| 143 |
+
)
|
| 144 |
+
ai_response = gr.Textbox(
|
| 145 |
+
label="AI Response",
|
| 146 |
+
lines=3,
|
| 147 |
+
interactive=False
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
reasoning_trace = gr.Textbox(
|
| 151 |
+
label="AI Reasoning Trace",
|
| 152 |
+
lines=5,
|
| 153 |
+
interactive=False
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
play_button.click(
|
| 157 |
+
fn=self.play_game,
|
| 158 |
+
inputs=[game_selector, user_move, game_state],
|
| 159 |
+
outputs=[game_state, ai_response, reasoning_trace]
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Reasoning Test Tab
|
| 163 |
+
with gr.TabItem("🧠 Reasoning Test"):
|
| 164 |
+
gr.Markdown("### Test AI reasoning on math and logic problems")
|
| 165 |
+
|
| 166 |
+
with gr.Row():
|
| 167 |
+
with gr.Column():
|
| 168 |
+
task_type = gr.Dropdown(
|
| 169 |
+
choices=["math", "logic", "strategic"],
|
| 170 |
+
value="math",
|
| 171 |
+
label="Task Type"
|
| 172 |
+
)
|
| 173 |
+
reasoning_prompt = gr.Textbox(
|
| 174 |
+
label="Reasoning Prompt",
|
| 175 |
+
placeholder="Enter a math problem or logic puzzle...",
|
| 176 |
+
lines=3
|
| 177 |
+
)
|
| 178 |
+
test_button = gr.Button("Test Reasoning", variant="primary")
|
| 179 |
+
|
| 180 |
+
with gr.Column():
|
| 181 |
+
reasoning_response = gr.Textbox(
|
| 182 |
+
label="AI Response",
|
| 183 |
+
lines=8,
|
| 184 |
+
interactive=False
|
| 185 |
+
)
|
| 186 |
+
reasoning_steps = gr.Textbox(
|
| 187 |
+
label="Step-by-Step Reasoning",
|
| 188 |
+
lines=8,
|
| 189 |
+
interactive=False
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
test_button.click(
|
| 193 |
+
fn=self.test_reasoning,
|
| 194 |
+
inputs=[reasoning_prompt, task_type],
|
| 195 |
+
outputs=[reasoning_response, reasoning_steps]
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# About Tab
|
| 199 |
+
with gr.TabItem("ℹ️ About"):
|
| 200 |
+
gr.Markdown("""
|
| 201 |
+
### About SPIRAL
|
| 202 |
+
|
| 203 |
+
This tool demonstrates the SPIRAL methodology: "Self-Play on Zero-Sum Games
|
| 204 |
+
Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."
|
| 205 |
+
|
| 206 |
+
**Key Features:**
|
| 207 |
+
- **Game Play**: Interactive games with AI opponents
|
| 208 |
+
- **Reasoning Traces**: Transparent AI decision-making
|
| 209 |
+
- **Transfer Learning**: Test reasoning on non-game tasks
|
| 210 |
+
- **Educational**: Learn about AI reasoning capabilities
|
| 211 |
+
|
| 212 |
+
**How it works:**
|
| 213 |
+
1. AI agents are trained via self-play on zero-sum games
|
| 214 |
+
2. Role-conditioned advantage estimation improves learning
|
| 215 |
+
3. Reasoning skills transfer to mathematical and logical tasks
|
| 216 |
+
4. Interactive interface shows the AI's thinking process
|
| 217 |
+
|
| 218 |
+
**Games Available:**
|
| 219 |
+
- **Kuhn Poker**: Simple poker variant with betting
|
| 220 |
+
- **TicTacToe**: Classic strategy game
|
| 221 |
+
|
| 222 |
+
**Technical Details:**
|
| 223 |
+
- Base Model: Qwen-4B from Hugging Face
|
| 224 |
+
- Training: PPO with self-play
|
| 225 |
+
- Interface: Gradio web app
|
| 226 |
+
""")
|
| 227 |
+
|
| 228 |
+
return demo
|
| 229 |
+
|
| 230 |
+
def launch(self, **kwargs):
|
| 231 |
+
"""Launch the Gradio app."""
|
| 232 |
+
demo = self.create_interface()
|
| 233 |
+
|
| 234 |
+
# Get launch configuration
|
| 235 |
+
gradio_config = self.config.get('interface', {}).get('gradio', {})
|
| 236 |
+
|
| 237 |
+
launch_kwargs = {
|
| 238 |
+
'server_name': gradio_config.get('server_name', '0.0.0.0'),
|
| 239 |
+
'server_port': gradio_config.get('server_port', 7860),
|
| 240 |
+
'share': gradio_config.get('share', False),
|
| 241 |
+
'inbrowser': gradio_config.get('inbrowser', True),
|
| 242 |
+
'enable_queue': gradio_config.get('enable_queue', True),
|
| 243 |
+
**kwargs
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
logger.info(f"Launching SPIRAL app with config: {launch_kwargs}")
|
| 247 |
+
demo.launch(**launch_kwargs)
|
| 248 |
+
|
| 249 |
+
def main():
|
| 250 |
+
"""Main entry point for the application."""
|
| 251 |
+
app = SpiralApp()
|
| 252 |
+
app.launch()
|
| 253 |
+
|
| 254 |
+
if __name__ == "__main__":
|
| 255 |
+
main()
|
config.yaml
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPIRAL Interactive Reasoning Game Simulator Configuration
|
| 2 |
+
|
| 3 |
+
# Model Configuration
|
| 4 |
+
model:
|
| 5 |
+
name: "Qwen/Qwen2.5-4B-Instruct"
|
| 6 |
+
max_length: 2048
|
| 7 |
+
temperature: 0.7
|
| 8 |
+
do_sample: true
|
| 9 |
+
quantization:
|
| 10 |
+
load_in_4bit: true
|
| 11 |
+
bnb_4bit_compute_dtype: "float16"
|
| 12 |
+
bnb_4bit_use_double_quant: true
|
| 13 |
+
|
| 14 |
+
# Games Configuration
|
| 15 |
+
games:
|
| 16 |
+
kuhn_poker:
|
| 17 |
+
name: "Kuhn Poker"
|
| 18 |
+
max_rounds: 50
|
| 19 |
+
deck_size: 3
|
| 20 |
+
betting_rounds: 2
|
| 21 |
+
|
| 22 |
+
tictactoe:
|
| 23 |
+
name: "TicTacToe"
|
| 24 |
+
board_size: 3
|
| 25 |
+
max_moves: 9
|
| 26 |
+
win_condition: 3
|
| 27 |
+
|
| 28 |
+
# Training Configuration
|
| 29 |
+
training:
|
| 30 |
+
algorithm: "PPO"
|
| 31 |
+
episodes: 1000
|
| 32 |
+
batch_size: 32
|
| 33 |
+
learning_rate: 0.0003
|
| 34 |
+
gamma: 0.99
|
| 35 |
+
gae_lambda: 0.95
|
| 36 |
+
clip_range: 0.2
|
| 37 |
+
entropy_coef: 0.01
|
| 38 |
+
value_loss_coef: 0.5
|
| 39 |
+
max_grad_norm: 0.5
|
| 40 |
+
|
| 41 |
+
# Self-play specific
|
| 42 |
+
self_play:
|
| 43 |
+
update_opponent_every: 100
|
| 44 |
+
opponent_pool_size: 5
|
| 45 |
+
|
| 46 |
+
# Role-conditioned advantage estimation
|
| 47 |
+
rae:
|
| 48 |
+
enable: true
|
| 49 |
+
role_embedding_dim: 64
|
| 50 |
+
advantage_weighting: 0.5
|
| 51 |
+
|
| 52 |
+
# Reasoning Configuration
|
| 53 |
+
reasoning:
|
| 54 |
+
enable_traces: true
|
| 55 |
+
trace_depth: 3
|
| 56 |
+
chain_of_thought: true
|
| 57 |
+
explanation_length: 150
|
| 58 |
+
|
| 59 |
+
# Transfer learning evaluation
|
| 60 |
+
transfer_tasks:
|
| 61 |
+
- "GSM8K"
|
| 62 |
+
- "Logic Puzzles"
|
| 63 |
+
- "Strategic Reasoning"
|
| 64 |
+
|
| 65 |
+
# Web Interface Configuration
|
| 66 |
+
interface:
|
| 67 |
+
title: "SPIRAL: Interactive Reasoning Game Simulator"
|
| 68 |
+
description: "Play games against AI and explore reasoning capabilities"
|
| 69 |
+
theme: "default"
|
| 70 |
+
|
| 71 |
+
# Gradio settings
|
| 72 |
+
gradio:
|
| 73 |
+
share: false
|
| 74 |
+
inbrowser: true
|
| 75 |
+
server_name: "0.0.0.0"
|
| 76 |
+
server_port: 7860
|
| 77 |
+
enable_queue: true
|
| 78 |
+
max_threads: 4
|
| 79 |
+
|
| 80 |
+
# Logging Configuration
|
| 81 |
+
logging:
|
| 82 |
+
level: "INFO"
|
| 83 |
+
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 84 |
+
file: "logs/spiral.log"
|
| 85 |
+
|
| 86 |
+
# Experiment tracking
|
| 87 |
+
wandb:
|
| 88 |
+
enable: false
|
| 89 |
+
project: "spiral-reasoning"
|
| 90 |
+
entity: "your-username"
|
| 91 |
+
|
| 92 |
+
tensorboard:
|
| 93 |
+
enable: true
|
| 94 |
+
log_dir: "logs/tensorboard"
|
| 95 |
+
|
| 96 |
+
# Data Configuration
|
| 97 |
+
data:
|
| 98 |
+
cache_dir: "data/cache"
|
| 99 |
+
datasets_dir: "data/datasets"
|
| 100 |
+
models_dir: "models"
|
| 101 |
+
|
| 102 |
+
# Benchmark datasets
|
| 103 |
+
benchmarks:
|
| 104 |
+
gsm8k: "data/benchmarks/gsm8k.json"
|
| 105 |
+
logic_puzzles: "data/benchmarks/logic_puzzles.json"
|
| 106 |
+
|
| 107 |
+
# Deployment Configuration
|
| 108 |
+
deployment:
|
| 109 |
+
huggingface:
|
| 110 |
+
space_name: "kaushikvr06/reasoning-simulator"
|
| 111 |
+
private: false
|
| 112 |
+
|
| 113 |
+
# Performance settings
|
| 114 |
+
performance:
|
| 115 |
+
max_concurrent_users: 10
|
| 116 |
+
timeout_seconds: 30
|
| 117 |
+
memory_limit: "2GB"
|
| 118 |
+
|
| 119 |
+
# Debug Configuration
|
| 120 |
+
debug:
|
| 121 |
+
enable: false
|
| 122 |
+
verbose_traces: false
|
| 123 |
+
save_game_logs: true
|
| 124 |
+
profile_inference: false
|
data/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPIRAL Data Directory
|
| 2 |
+
|
| 3 |
+
This directory contains datasets, benchmarks, and cached data for the SPIRAL Interactive Reasoning Game Simulator.
|
| 4 |
+
|
| 5 |
+
## Structure
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
data/
|
| 9 |
+
├── cache/ # Cached model outputs and processed data
|
| 10 |
+
├── datasets/ # Game datasets and training data
|
| 11 |
+
├── benchmarks/ # Evaluation benchmarks for transfer learning
|
| 12 |
+
│ ├── gsm8k.json # GSM8K math problems
|
| 13 |
+
│ └── logic_puzzles.json # Logic reasoning puzzles
|
| 14 |
+
└── README.md # This file
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
## Datasets
|
| 18 |
+
|
| 19 |
+
### Game Datasets
|
| 20 |
+
- **Kuhn Poker**: Training games and strategies
|
| 21 |
+
- **TicTacToe**: Game states and optimal moves
|
| 22 |
+
|
| 23 |
+
### Benchmark Datasets
|
| 24 |
+
- **GSM8K**: Grade School Math 8K dataset for mathematical reasoning
|
| 25 |
+
- **Logic Puzzles**: Custom logic and reasoning problems
|
| 26 |
+
- **Strategic Reasoning**: Game-theory based reasoning tasks
|
| 27 |
+
|
| 28 |
+
## Usage
|
| 29 |
+
|
| 30 |
+
Datasets are automatically downloaded and cached when first used. To manually download:
|
| 31 |
+
|
| 32 |
+
```python
|
| 33 |
+
from src.data_utils import download_datasets
|
| 34 |
+
download_datasets()
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## Data Sources
|
| 38 |
+
|
| 39 |
+
- GSM8K: [Cobbe et al. 2021](https://arxiv.org/abs/2110.14168)
|
| 40 |
+
- Logic Puzzles: Curated collection from various sources
|
| 41 |
+
- Game Data: Generated through self-play training
|
| 42 |
+
|
| 43 |
+
## License
|
| 44 |
+
|
| 45 |
+
Please refer to individual dataset licenses for usage rights.
|
execution-plan.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPIRAL Demo App Execution Plan
|
| 2 |
+
|
| 3 |
+
This execution plan outlines the development of a practical, interactive tool on Hugging Face Spaces based on the SPIRAL paper ("Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning"). The tool will be an **Interactive Reasoning Game Simulator**: Users can play zero-sum games (e.g., Kuhn Poker, TicTacToe) against a self-play trained AI, view step-by-step reasoning traces, and test the AI's transferred reasoning skills on non-game tasks like math problems or logic puzzles.
|
| 4 |
+
|
| 5 |
+
**Utility Focus**:
|
| 6 |
+
- **Non-Technical Users**: Simple web interface to play games, learn about AI reasoning through visualizations, and experiment with prompts for educational fun (e.g., "How does AI think in games?").
|
| 7 |
+
- **Technical Users**: Access to model weights, training scripts, and APIs for extending the self-play system (e.g., custom games or fine-tuning).
|
| 8 |
+
- **Practicality**: Free to use, no setup required; demonstrates real-world AI applications in strategy, education, and decision-making. Aims for broad appeal: 1000+ users via HF community sharing.
|
| 9 |
+
|
| 10 |
+
The plan is divided into phases with checkboxes for sub-tasks. Each phase includes detailed "how" steps.
|
| 11 |
+
|
| 12 |
+
## Phase 1: Research and Planning
|
| 13 |
+
- [ ] Review SPIRAL Paper and Gather Resources
|
| 14 |
+
- How: Read the full paper (use attached snips as reference). Identify key components: self-play RL on games like Kuhn Poker, role-conditioned advantage estimation (RAE), multi-agent multi-turn training. Download base models (e.g., Qwen-4B from HF) and RL libs (Gym, Stable Baselines). Collect datasets: Simple game rules/implementations from GitHub; math benchmarks like GSM8K for transfer testing.
|
| 15 |
+
- [ ] Define Tool Features
|
| 16 |
+
- How: Brainstorm user flows. Core: Game mode (user vs. AI play), Reasoning Viewer (display traces), Transfer Tester (input math/logic queries). Add tutorials for non-tech users, exportable logs for tech users. Ensure accessibility: Mobile-friendly UI, low-latency inference.
|
| 17 |
+
- [ ] Scope Requirements and Tech Stack
|
| 18 |
+
- How: Choose Python for backend; Gradio for HF Spaces UI (easy interactive elements like buttons for moves). Use Transformers for LLM, Gym for games, PPO from Stable Baselines for RL demo. Estimate: 1-2 weeks dev time, free HF tier for hosting (upgrade to GPU if needed for training demos).
|
| 19 |
+
|
| 20 |
+
## Phase 2: Implementation
|
| 21 |
+
- [ ] Set Up Project Structure
|
| 22 |
+
- How: Create a Git repo. Folders: `src/` for code, `models/` for weights, `data/` for game datasets, `app/` for Gradio script. Initialize with `requirements.txt`: transformers, torch, gymnasium, stable-baselines3, gradio.
|
| 23 |
+
- [ ] Implement Game Environments
|
| 24 |
+
- How: Code Gym envs for Kuhn Poker/TicTacToe (e.g., class KuhnPokerEnv(gym.Env) with action_space, observation_space, reward for wins). Add multi-turn logic: Track game state, player turns.
|
| 25 |
+
- [ ] Train SPIRAL Model
|
| 26 |
+
- How: Load base LLM (Qwen-4B). Implement self-play: Clone agent, train via PPO with RAE (custom advantage function: advantage = reward + value - baseline, conditioned on roles like 'player' vs. 'opponent'). Train on 1000+ episodes (simulate self-improvement). Save checkpoints to HF Model Hub.
|
| 27 |
+
- [ ] Build Reasoning and Transfer Components
|
| 28 |
+
- How: For games, generate traces (e.g., "Opponent bet high → Likely strong hand → Fold"). For transfer, prompt model with math tasks post-training. Use chain-of-thought prompting for visibility.
|
| 29 |
+
- [ ] Develop User Interface
|
| 30 |
+
- How: Use Gradio Blocks: Tab 1: Game Play (dropdown for game, text input for moves, output panel for AI response/trace). Tab 2: Tester (input prompt, show output). Add buttons for "Explain Reasoning" and "Export Session". Style with CSS for modern UX (e.g., cards, animations).
|
| 31 |
+
|
| 32 |
+
## Phase 3: Testing and Optimization
|
| 33 |
+
- [ ] Unit and Integration Testing
|
| 34 |
+
- How: Test game logic (e.g., assert win conditions). Run self-play simulations to verify improvements (e.g., win rate >50% after training). Use pytest for automation.
|
| 35 |
+
- [ ] User Testing
|
| 36 |
+
- How: Simulate non-tech users (play games, check intuitiveness). For tech users, test API endpoints. Gather feedback via HF Spaces comments or a built-in form. Measure metrics: Latency <2s per move, accuracy on benchmarks (+8% as per paper).
|
| 37 |
+
- [ ] Optimize for HF Spaces
|
| 38 |
+
- How: Profile for CPU/GPU usage; use model quantization (e.g., bitsandbytes) for faster inference. Ensure no interactive flags needed (e.g., --yes for installs).
|
| 39 |
+
|
| 40 |
+
## Phase 4: Deployment and Documentation
|
| 41 |
+
- [ ] Deploy to Hugging Face Spaces
|
| 42 |
+
- How: Create Space, upload repo via Git. Set entry point to Gradio app.py. Enable public access, add tags like "AI", "Games", "Reasoning" for discoverability.
|
| 43 |
+
- [ ] Create Documentation and Tutorials
|
| 44 |
+
- How: Write README.md with paper summary, usage guide (screenshots), and code explanations. Add in-app help: Tooltips for buttons, video demo. For tech users: Include training scripts and extension guides.
|
| 45 |
+
- [ ] Launch and Promote
|
| 46 |
+
- How: Share on HF forums, Reddit (r/MachineLearning), Twitter. Monitor usage via HF analytics; iterate based on feedback (e.g., add more games).
|
| 47 |
+
|
| 48 |
+
## Phase 5: Maintenance and Iteration
|
| 49 |
+
- [ ] Monitor and Update
|
| 50 |
+
- How: Check for issues (e.g., via GitHub Issues). Update model with new games or better RL algos. Aim for v2: Multimodal (add image-based games).
|
| 51 |
+
- [ ] Measure Impact
|
| 52 |
+
- How: Track metrics: User sessions, feedback ratings. Goal: 1000+ interactions in first month, positive reviews highlighting educational value.
|
| 53 |
+
|
| 54 |
+
This plan ensures a useful tool that's easy to use, educational, and extensible.
|
requirements.txt
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core ML and Deep Learning
|
| 2 |
+
torch>=2.0.0
|
| 3 |
+
transformers>=4.30.0
|
| 4 |
+
accelerate>=0.20.0
|
| 5 |
+
bitsandbytes>=0.41.0
|
| 6 |
+
|
| 7 |
+
# Reinforcement Learning
|
| 8 |
+
gymnasium>=0.28.0
|
| 9 |
+
stable-baselines3>=2.0.0
|
| 10 |
+
sb3-contrib>=2.0.0
|
| 11 |
+
|
| 12 |
+
# Web Interface
|
| 13 |
+
gradio>=4.0.0
|
| 14 |
+
|
| 15 |
+
# Data Processing and Utilities
|
| 16 |
+
numpy>=1.21.0
|
| 17 |
+
pandas>=1.3.0
|
| 18 |
+
matplotlib>=3.5.0
|
| 19 |
+
seaborn>=0.11.0
|
| 20 |
+
plotly>=5.0.0
|
| 21 |
+
|
| 22 |
+
# Game Theory and Math
|
| 23 |
+
scipy>=1.7.0
|
| 24 |
+
networkx>=2.6.0
|
| 25 |
+
|
| 26 |
+
# Model Management
|
| 27 |
+
huggingface-hub>=0.16.0
|
| 28 |
+
datasets>=2.10.0
|
| 29 |
+
|
| 30 |
+
# Testing and Development
|
| 31 |
+
pytest>=7.0.0
|
| 32 |
+
pytest-cov>=4.0.0
|
| 33 |
+
black>=22.0.0
|
| 34 |
+
flake8>=5.0.0
|
| 35 |
+
|
| 36 |
+
# Logging and Monitoring
|
| 37 |
+
wandb>=0.15.0
|
| 38 |
+
tensorboard>=2.10.0
|
| 39 |
+
|
| 40 |
+
# Utilities
|
| 41 |
+
tqdm>=4.64.0
|
| 42 |
+
pyyaml>=6.0.0
|
| 43 |
+
python-dotenv>=1.0.0
|
| 44 |
+
requests>=2.28.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SPIRAL: Interactive Reasoning Game Simulator
|
| 3 |
+
|
| 4 |
+
A practical tool demonstrating how self-play training on zero-sum games
|
| 5 |
+
can improve AI reasoning capabilities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
__version__ = "0.1.0"
|
| 9 |
+
__author__ = "SPIRAL Team"
|
| 10 |
+
__email__ = "contact@spiral-reasoning.com"
|
| 11 |
+
|
| 12 |
+
from .games import *
|
| 13 |
+
from .models import *
|
| 14 |
+
from .training import *
|
| 15 |
+
from .reasoning import *
|
src/games/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Game environments for SPIRAL reasoning simulator.
|
| 3 |
+
|
| 4 |
+
This module contains implementations of zero-sum games used for self-play training,
|
| 5 |
+
including Kuhn Poker, TicTacToe, and other strategic games.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .kuhn_poker import KuhnPokerEnv
|
| 9 |
+
from .tictactoe import TicTacToeEnv
|
| 10 |
+
from .base_game import BaseGameEnv
|
| 11 |
+
|
| 12 |
+
__all__ = ["KuhnPokerEnv", "TicTacToeEnv", "BaseGameEnv"]
|
src/models/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Model implementations for SPIRAL reasoning simulator.
|
| 3 |
+
|
| 4 |
+
This module contains the SPIRAL model architecture, role-conditioned advantage
|
| 5 |
+
estimation, and other model components for self-play training.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .spiral_model import SpiralModel
|
| 9 |
+
from .rae import RoleConditionedAdvantageEstimator
|
| 10 |
+
from .policy_network import PolicyNetwork
|
| 11 |
+
from .value_network import ValueNetwork
|
| 12 |
+
|
| 13 |
+
__all__ = ["SpiralModel", "RoleConditionedAdvantageEstimator", "PolicyNetwork", "ValueNetwork"]
|
src/reasoning/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Reasoning components for SPIRAL reasoning simulator.
|
| 3 |
+
|
| 4 |
+
This module contains reasoning trace generation, chain-of-thought processing,
|
| 5 |
+
and transfer learning evaluation for testing reasoning capabilities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .trace_generator import TraceGenerator
|
| 9 |
+
from .chain_of_thought import ChainOfThought
|
| 10 |
+
from .transfer_evaluator import TransferEvaluator
|
| 11 |
+
from .reasoning_utils import ReasoningUtils
|
| 12 |
+
|
| 13 |
+
__all__ = ["TraceGenerator", "ChainOfThought", "TransferEvaluator", "ReasoningUtils"]
|
src/training/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Training components for SPIRAL reasoning simulator.
|
| 3 |
+
|
| 4 |
+
This module contains the self-play training logic, PPO implementation with
|
| 5 |
+
role-conditioned advantage estimation, and training utilities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .self_play_trainer import SelfPlayTrainer
|
| 9 |
+
from .ppo_trainer import PPOTrainer
|
| 10 |
+
from .opponent_manager import OpponentManager
|
| 11 |
+
from .training_utils import TrainingUtils
|
| 12 |
+
|
| 13 |
+
__all__ = ["SelfPlayTrainer", "PPOTrainer", "OpponentManager", "TrainingUtils"]
|
tests/test_basic.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Basic tests for SPIRAL Interactive Reasoning Game Simulator.
|
| 3 |
+
|
| 4 |
+
This module contains fundamental tests to verify the core functionality
|
| 5 |
+
of the SPIRAL system components.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pytest
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import yaml
|
| 12 |
+
|
| 13 |
+
# Add the src directory to the path for imports
|
| 14 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 15 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'app'))
|
| 16 |
+
|
| 17 |
+
from app import SpiralApp
|
| 18 |
+
|
| 19 |
+
class TestSpiralApp:
|
| 20 |
+
"""Test cases for the main SPIRAL application."""
|
| 21 |
+
|
| 22 |
+
def test_app_initialization(self):
|
| 23 |
+
"""Test that the app initializes correctly."""
|
| 24 |
+
app = SpiralApp()
|
| 25 |
+
assert app is not None
|
| 26 |
+
assert hasattr(app, 'config')
|
| 27 |
+
assert hasattr(app, 'play_game')
|
| 28 |
+
assert hasattr(app, 'test_reasoning')
|
| 29 |
+
|
| 30 |
+
def test_config_loading(self):
|
| 31 |
+
"""Test configuration loading."""
|
| 32 |
+
app = SpiralApp()
|
| 33 |
+
assert 'interface' in app.config
|
| 34 |
+
assert 'games' in app.config
|
| 35 |
+
assert app.config['interface']['title'] is not None
|
| 36 |
+
|
| 37 |
+
def test_play_game_basic(self):
|
| 38 |
+
"""Test basic game play functionality."""
|
| 39 |
+
app = SpiralApp()
|
| 40 |
+
|
| 41 |
+
# Test with valid input
|
| 42 |
+
state, response, trace = app.play_game("kuhn_poker", "bet", "")
|
| 43 |
+
assert state is not None
|
| 44 |
+
assert response is not None
|
| 45 |
+
assert trace is not None
|
| 46 |
+
assert "bet" in state
|
| 47 |
+
|
| 48 |
+
# Test with empty input
|
| 49 |
+
state, response, trace = app.play_game("kuhn_poker", "", "")
|
| 50 |
+
assert "Please enter a move!" in response
|
| 51 |
+
|
| 52 |
+
def test_reasoning_basic(self):
|
| 53 |
+
"""Test basic reasoning functionality."""
|
| 54 |
+
app = SpiralApp()
|
| 55 |
+
|
| 56 |
+
# Test with valid input
|
| 57 |
+
response, trace = app.test_reasoning("What is 2+2?", "math")
|
| 58 |
+
assert response is not None
|
| 59 |
+
assert trace is not None
|
| 60 |
+
assert "2+2" in response
|
| 61 |
+
|
| 62 |
+
# Test with empty input
|
| 63 |
+
response, trace = app.test_reasoning("", "math")
|
| 64 |
+
assert "Please enter a reasoning prompt!" in response
|
| 65 |
+
|
| 66 |
+
def test_interface_creation(self):
|
| 67 |
+
"""Test that the Gradio interface can be created."""
|
| 68 |
+
app = SpiralApp()
|
| 69 |
+
demo = app.create_interface()
|
| 70 |
+
assert demo is not None
|
| 71 |
+
|
| 72 |
+
class TestConfiguration:
|
| 73 |
+
"""Test cases for configuration management."""
|
| 74 |
+
|
| 75 |
+
def test_config_file_structure(self):
|
| 76 |
+
"""Test that config.yaml has the expected structure."""
|
| 77 |
+
config_path = os.path.join(os.path.dirname(__file__), '..', 'config.yaml')
|
| 78 |
+
|
| 79 |
+
if os.path.exists(config_path):
|
| 80 |
+
with open(config_path, 'r') as f:
|
| 81 |
+
config = yaml.safe_load(f)
|
| 82 |
+
|
| 83 |
+
# Check required sections
|
| 84 |
+
assert 'model' in config
|
| 85 |
+
assert 'games' in config
|
| 86 |
+
assert 'training' in config
|
| 87 |
+
assert 'reasoning' in config
|
| 88 |
+
assert 'interface' in config
|
| 89 |
+
|
| 90 |
+
# Check model configuration
|
| 91 |
+
assert 'name' in config['model']
|
| 92 |
+
assert 'max_length' in config['model']
|
| 93 |
+
|
| 94 |
+
# Check games configuration
|
| 95 |
+
assert 'kuhn_poker' in config['games']
|
| 96 |
+
assert 'tictactoe' in config['games']
|
| 97 |
+
|
| 98 |
+
class TestProjectStructure:
|
| 99 |
+
"""Test cases for project structure and imports."""
|
| 100 |
+
|
| 101 |
+
def test_src_directory_structure(self):
|
| 102 |
+
"""Test that the src directory has the expected structure."""
|
| 103 |
+
src_path = os.path.join(os.path.dirname(__file__), '..', 'src')
|
| 104 |
+
|
| 105 |
+
# Check that required directories exist
|
| 106 |
+
assert os.path.exists(os.path.join(src_path, 'games'))
|
| 107 |
+
assert os.path.exists(os.path.join(src_path, 'models'))
|
| 108 |
+
assert os.path.exists(os.path.join(src_path, 'training'))
|
| 109 |
+
assert os.path.exists(os.path.join(src_path, 'reasoning'))
|
| 110 |
+
|
| 111 |
+
# Check that __init__.py files exist
|
| 112 |
+
assert os.path.exists(os.path.join(src_path, '__init__.py'))
|
| 113 |
+
assert os.path.exists(os.path.join(src_path, 'games', '__init__.py'))
|
| 114 |
+
assert os.path.exists(os.path.join(src_path, 'models', '__init__.py'))
|
| 115 |
+
assert os.path.exists(os.path.join(src_path, 'training', '__init__.py'))
|
| 116 |
+
assert os.path.exists(os.path.join(src_path, 'reasoning', '__init__.py'))
|
| 117 |
+
|
| 118 |
+
def test_required_files_exist(self):
|
| 119 |
+
"""Test that required project files exist."""
|
| 120 |
+
project_root = os.path.join(os.path.dirname(__file__), '..')
|
| 121 |
+
|
| 122 |
+
# Check essential files
|
| 123 |
+
assert os.path.exists(os.path.join(project_root, 'requirements.txt'))
|
| 124 |
+
assert os.path.exists(os.path.join(project_root, 'README.md'))
|
| 125 |
+
assert os.path.exists(os.path.join(project_root, 'config.yaml'))
|
| 126 |
+
assert os.path.exists(os.path.join(project_root, '.gitignore'))
|
| 127 |
+
assert os.path.exists(os.path.join(project_root, 'app', 'app.py'))
|
| 128 |
+
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
pytest.main([__file__])
|