Spaces:
Sleeping
feat: implement complete Bayesian Game with domain-driven architecture
Browse files- Add Environment Domain with EnvironmentEvidence dataclass and Environment class for pure evidence generation
- Add Belief Domain with BeliefUpdate dataclass and BayesianBeliefState class for Bayesian inference
- Add Game Coordination with GameState dataclass and BayesianGame orchestration class
- Add Gradio web interface with real-time belief visualization and game controls
- Implement proper information filtering: belief agent receives only comparison results, not dice values
- Add comprehensive test suite (78 tests) including architectural constraint verification
- Add memory leak prevention and graceful game completion in UI
- Support configurable dice sides and round counts with reproducible seeded experiments
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- .gitignore +191 -0
- CLAUDE.md +107 -0
- README.md +242 -0
- app.py +24 -0
- domains/__init__.py +1 -0
- domains/belief/__init__.py +1 -0
- domains/belief/belief_domain.py +123 -0
- domains/coordination/__init__.py +1 -0
- domains/coordination/game_coordination.py +193 -0
- domains/environment/__init__.py +1 -0
- domains/environment/environment_domain.py +87 -0
- requirements.txt +4 -0
- tests/__init__.py +1 -0
- tests/test_architectural_constraints.py +159 -0
- tests/test_belief_domain.py +295 -0
- tests/test_environment_domain.py +187 -0
- tests/test_game_coordination.py +351 -0
- tests/test_ui_interface.py +243 -0
- ui/__init__.py +1 -0
- ui/gradio_interface.py +370 -0
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
| 158 |
+
# project, it is not recommended to check the .gitignore file into the git repo
|
| 159 |
+
# but consider adding it to the global gitignore or setting up git config properly
|
| 160 |
+
# for your development environment.
|
| 161 |
+
.idea/
|
| 162 |
+
|
| 163 |
+
# VS Code
|
| 164 |
+
.vscode/
|
| 165 |
+
|
| 166 |
+
# macOS
|
| 167 |
+
.DS_Store
|
| 168 |
+
.DS_Store?
|
| 169 |
+
._*
|
| 170 |
+
.Spotlight-V100
|
| 171 |
+
.Trashes
|
| 172 |
+
ehthumbs.db
|
| 173 |
+
Thumbs.db
|
| 174 |
+
|
| 175 |
+
# Windows
|
| 176 |
+
Thumbs.db
|
| 177 |
+
ehthumbs.db
|
| 178 |
+
Desktop.ini
|
| 179 |
+
|
| 180 |
+
# Gradio temporary files
|
| 181 |
+
gradio_cached_examples/
|
| 182 |
+
flagged/
|
| 183 |
+
|
| 184 |
+
# Matplotlib cache
|
| 185 |
+
.matplotlib/
|
| 186 |
+
|
| 187 |
+
# Temporary files
|
| 188 |
+
*.tmp
|
| 189 |
+
*.temp
|
| 190 |
+
temp/
|
| 191 |
+
tmp/
|
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Bayesian Game Project
|
| 2 |
+
|
| 3 |
+
## Project Overview
|
| 4 |
+
A Bayesian Game implementation featuring a Belief-based Agent using domain-driven design.
|
| 5 |
+
|
| 6 |
+
## Game Rules
|
| 7 |
+
- Judge and Player 1 can see the target die value
|
| 8 |
+
- Player 2 must deduce the target value using only comparison results
|
| 9 |
+
- Player 1 rolls dice and reports "higher"/"lower"/"same" compared to target
|
| 10 |
+
- **CRITICAL**: Player 2 receives ONLY the comparison result, NOT the dice roll value
|
| 11 |
+
- Game runs for 10 rounds
|
| 12 |
+
- Judge ensures truth-telling
|
| 13 |
+
|
| 14 |
+
## Development Practices
|
| 15 |
+
- Use conventional commits when committing code to git
|
| 16 |
+
|
| 17 |
+
## Architecture
|
| 18 |
+
Domain-Driven Design with 3 modules:
|
| 19 |
+
|
| 20 |
+
1. **Environment Domain** (`domains/environment/environment_domain.py`)
|
| 21 |
+
- EnvironmentEvidence dataclass (contains dice_roll AND comparison_result)
|
| 22 |
+
- Environment class for target/evidence generation
|
| 23 |
+
- **ACCESS**: Full knowledge of dice rolls and target values
|
| 24 |
+
|
| 25 |
+
2. **Belief Domain** (`domains/belief/belief_domain.py`)
|
| 26 |
+
- BeliefUpdate dataclass (contains ONLY comparison_result)
|
| 27 |
+
- BayesianBeliefState class for inference
|
| 28 |
+
- **ACCESS**: NO knowledge of dice roll values or true target
|
| 29 |
+
- **CONSTRAINT**: Must calculate P(comparison_result | target) probabilistically
|
| 30 |
+
|
| 31 |
+
3. **Game Coordination** (`domains/coordination/game_coordination.py`)
|
| 32 |
+
- GameState dataclass (tracks full game state)
|
| 33 |
+
- BayesianGame orchestration class
|
| 34 |
+
- **RESPONSIBILITY**: Filters EnvironmentEvidence to create BeliefUpdate
|
| 35 |
+
|
| 36 |
+
## Development Commands
|
| 37 |
+
- Test: `python -m pytest tests/`
|
| 38 |
+
- Run: `python app.py`
|
| 39 |
+
|
| 40 |
+
## Folder Structure
|
| 41 |
+
```
|
| 42 |
+
bayesian_game/
|
| 43 |
+
├── domains/
|
| 44 |
+
│ ├── environment/environment_domain.py
|
| 45 |
+
│ ├── belief/belief_domain.py
|
| 46 |
+
│ └── coordination/game_coordination.py
|
| 47 |
+
├── ui/gradio_interface.py
|
| 48 |
+
├── tests/
|
| 49 |
+
├── app.py # Hugging Face entry point
|
| 50 |
+
├── requirements.txt
|
| 51 |
+
└── CLAUDE.md
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
## Implementation Status
|
| 55 |
+
- ✅ Architecture implemented with proper domain separation
|
| 56 |
+
- ✅ Domain-driven design with information filtering enforced
|
| 57 |
+
- ✅ Gradio UI with graceful completion and comprehensive final results
|
| 58 |
+
- ✅ Comprehensive test suite (78 tests) ensuring architectural constraints
|
| 59 |
+
- ✅ Proper Bayesian inference without dice roll knowledge
|
| 60 |
+
- ✅ Memory leak prevention in matplotlib figure generation
|
| 61 |
+
|
| 62 |
+
## Key Design Decisions & Architectural Constraints
|
| 63 |
+
|
| 64 |
+
### Information Flow Rules
|
| 65 |
+
1. **Environment → Coordination**: EnvironmentEvidence (dice_roll + comparison_result)
|
| 66 |
+
2. **Coordination → Belief**: BeliefUpdate (comparison_result ONLY)
|
| 67 |
+
3. **NEVER**: Direct Environment → Belief communication
|
| 68 |
+
4. **NEVER**: Belief domain access to dice roll values
|
| 69 |
+
|
| 70 |
+
### Domain Separation Principles
|
| 71 |
+
- **Environment Domain**: No probability knowledge, pure evidence generation
|
| 72 |
+
- **Belief Domain**: Pure Bayesian inference, no knowledge of actual dice values
|
| 73 |
+
- **Coordination Layer**: Thin orchestration, responsible for information filtering
|
| 74 |
+
- **UI Layer**: Separate from core game logic, can display full information
|
| 75 |
+
|
| 76 |
+
### Critical Implementation Rules
|
| 77 |
+
- BeliefUpdate dataclass MUST contain only comparison_result
|
| 78 |
+
- BayesianBeliefState MUST calculate P(comparison_result | target) probabilistically
|
| 79 |
+
- Game coordination MUST filter dice_roll from EnvironmentEvidence before passing to belief domain
|
| 80 |
+
- Tests MUST verify that belief domain never receives dice roll values
|
| 81 |
+
|
| 82 |
+
## Maintaining Architectural Integrity
|
| 83 |
+
|
| 84 |
+
### Code Review Checklist
|
| 85 |
+
When modifying the codebase, ensure:
|
| 86 |
+
- [ ] BeliefUpdate contains ONLY comparison_result field
|
| 87 |
+
- [ ] No dice_roll parameter passed to belief domain methods
|
| 88 |
+
- [ ] Game coordination filters EnvironmentEvidence properly
|
| 89 |
+
- [ ] Tests verify belief domain isolation
|
| 90 |
+
- [ ] Belief calculations use probabilistic formulas, not direct dice values
|
| 91 |
+
|
| 92 |
+
### Anti-Patterns to Avoid
|
| 93 |
+
❌ `BeliefUpdate(dice_roll=X, comparison_result=Y)` - belief shouldn't know dice value
|
| 94 |
+
❌ Direct Environment-Belief communication
|
| 95 |
+
❌ Belief domain knowing actual dice roll or target values
|
| 96 |
+
❌ Hard-coded probability values instead of calculated P(comparison_result | target)
|
| 97 |
+
|
| 98 |
+
### Correct Patterns
|
| 99 |
+
✅ `BeliefUpdate(comparison_result="higher")` - only comparison result
|
| 100 |
+
✅ Environment → Coordination → Belief information flow
|
| 101 |
+
✅ Probabilistic calculations: P(roll > target) = (dice_sides - target) / dice_sides
|
| 102 |
+
✅ Clean domain boundaries with no cross-dependencies
|
| 103 |
+
|
| 104 |
+
## Dependencies
|
| 105 |
+
- gradio (for UI)
|
| 106 |
+
- numpy (for Bayesian calculations)
|
| 107 |
+
- pytest (for testing)
|
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎲 Bayesian Game
|
| 2 |
+
|
| 3 |
+
A Bayesian Game implementation featuring a Belief-based Agent using domain-driven design. This interactive game demonstrates Bayesian inference in action as Player 2 attempts to deduce a hidden target die value based on evidence from dice rolls.
|
| 4 |
+
|
| 5 |
+
## 🎯 Game Overview
|
| 6 |
+
|
| 7 |
+
**The Setup:**
|
| 8 |
+
- Judge and Player 1 can see the target die value (1-6)
|
| 9 |
+
- Player 2 must deduce the target value using Bayesian inference
|
| 10 |
+
- Each round: Player 1 rolls dice and reports "higher"/"lower"/"same" compared to target
|
| 11 |
+
- **Player 2 only receives the comparison result, NOT the actual dice roll value**
|
| 12 |
+
- Game runs for 10 rounds (configurable)
|
| 13 |
+
- Judge ensures truth-telling
|
| 14 |
+
|
| 15 |
+
**The Challenge:**
|
| 16 |
+
Player 2 starts with uniform beliefs about the target value and updates their beliefs after each piece of evidence using Bayes' rule. The key insight is that Player 2 must calculate the probability that ANY dice roll would produce the observed comparison result for each possible target value.
|
| 17 |
+
|
| 18 |
+
## 🏗️ Architecture
|
| 19 |
+
|
| 20 |
+
Built using **Domain-Driven Design** with clean separation of concerns:
|
| 21 |
+
|
| 22 |
+
### 1. Environment Domain (`domains/environment/`)
|
| 23 |
+
- **Pure evidence generation** - no probability knowledge
|
| 24 |
+
- `EnvironmentEvidence`: Dataclass for dice roll results
|
| 25 |
+
- `Environment`: Generates target values and dice roll comparisons
|
| 26 |
+
|
| 27 |
+
### 2. Belief Domain (`domains/belief/`)
|
| 28 |
+
- **Pure Bayesian inference** - receives only comparison results, no dice roll values
|
| 29 |
+
- `BeliefUpdate`: Dataclass containing only comparison results
|
| 30 |
+
- `BayesianBeliefState`: Calculates likelihood P(comparison_result | target) for each possible target
|
| 31 |
+
|
| 32 |
+
### 3. Game Coordination (`domains/coordination/`)
|
| 33 |
+
- **Thin orchestration layer** - coordinates between domains
|
| 34 |
+
- `GameState`: Tracks current game state
|
| 35 |
+
- `BayesianGame`: Main game orchestration class
|
| 36 |
+
|
| 37 |
+
### 4. UI Layer (`ui/`)
|
| 38 |
+
- Interactive Gradio web interface
|
| 39 |
+
- Real-time belief visualization
|
| 40 |
+
- Game controls and statistics display
|
| 41 |
+
|
| 42 |
+
## 🚀 Quick Start
|
| 43 |
+
|
| 44 |
+
### Prerequisites
|
| 45 |
+
- Python 3.10+
|
| 46 |
+
- `uv` package manager (recommended) or `pip`
|
| 47 |
+
|
| 48 |
+
### Installation
|
| 49 |
+
|
| 50 |
+
1. **Clone and navigate to the project:**
|
| 51 |
+
```bash
|
| 52 |
+
git clone <repository-url>
|
| 53 |
+
cd bayesian_game
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
2. **Set up virtual environment:**
|
| 57 |
+
```bash
|
| 58 |
+
# Using uv (recommended)
|
| 59 |
+
uv venv
|
| 60 |
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 61 |
+
|
| 62 |
+
# Or using pip
|
| 63 |
+
python -m venv venv
|
| 64 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
3. **Install dependencies:**
|
| 68 |
+
```bash
|
| 69 |
+
# Using uv
|
| 70 |
+
uv pip install -r requirements.txt
|
| 71 |
+
|
| 72 |
+
# Or using pip
|
| 73 |
+
pip install -r requirements.txt
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Running the Game
|
| 77 |
+
|
| 78 |
+
**Launch the interactive web interface:**
|
| 79 |
+
```bash
|
| 80 |
+
python app.py
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
The game will be available at `http://localhost:7860`
|
| 84 |
+
|
| 85 |
+
**Run from command line (for development):**
|
| 86 |
+
```python
|
| 87 |
+
from domains.coordination.game_coordination import BayesianGame
|
| 88 |
+
|
| 89 |
+
# Create and start a game
|
| 90 |
+
game = BayesianGame(seed=42)
|
| 91 |
+
game.start_new_game(target_value=3)
|
| 92 |
+
|
| 93 |
+
# Play rounds
|
| 94 |
+
for round_num in range(5):
|
| 95 |
+
state = game.play_round()
|
| 96 |
+
evidence = state.evidence_history[-1]
|
| 97 |
+
print(f"Round {round_num + 1}: Rolled {evidence.dice_roll} → {evidence.comparison_result}")
|
| 98 |
+
print(f"Most likely target: {state.most_likely_target}")
|
| 99 |
+
print(f"Belief entropy: {state.belief_entropy:.2f}")
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## 🧪 Testing
|
| 103 |
+
|
| 104 |
+
Run the comprehensive test suite:
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
# Run all tests
|
| 108 |
+
python -m pytest tests/ -v
|
| 109 |
+
|
| 110 |
+
# Run specific domain tests
|
| 111 |
+
python -m pytest tests/test_environment_domain.py -v
|
| 112 |
+
python -m pytest tests/test_belief_domain.py -v
|
| 113 |
+
python -m pytest tests/test_game_coordination.py -v
|
| 114 |
+
|
| 115 |
+
# Run with coverage
|
| 116 |
+
python -m pytest tests/ --cov=domains --cov-report=html
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
**Test Coverage:**
|
| 120 |
+
- 56 comprehensive tests
|
| 121 |
+
- All core functionality covered
|
| 122 |
+
- Edge cases and error handling tested
|
| 123 |
+
- Reproducibility and randomness testing
|
| 124 |
+
|
| 125 |
+
## 🎮 Game Interface
|
| 126 |
+
|
| 127 |
+
The Gradio interface provides:
|
| 128 |
+
|
| 129 |
+
- **Game Controls**: Start new games, play rounds, reset settings
|
| 130 |
+
- **Real-time Visualization**: Belief probability distribution chart
|
| 131 |
+
- **Game Statistics**: Entropy, accuracy, round information
|
| 132 |
+
- **Evidence History**: Complete log of dice rolls and comparisons
|
| 133 |
+
- **Customization**: Adjustable dice sides and round count
|
| 134 |
+
|
| 135 |
+
### Interface Features
|
| 136 |
+
|
| 137 |
+
- 📊 **Belief Distribution Chart**: Visual representation of Player 2's beliefs
|
| 138 |
+
- 🎯 **Target Highlighting**: True target and most likely guess highlighted
|
| 139 |
+
- 📝 **Evidence Log**: Complete history of all dice rolls and results
|
| 140 |
+
- ⚙️ **Game Settings**: Customize dice sides (2-20) and max rounds (1-50)
|
| 141 |
+
- 🔄 **Reset & Replay**: Easy game reset and replay functionality
|
| 142 |
+
|
| 143 |
+
## 📁 Project Structure
|
| 144 |
+
|
| 145 |
+
```
|
| 146 |
+
bayesian_game/
|
| 147 |
+
├── domains/ # Core domain logic
|
| 148 |
+
│ ├── environment/ # Evidence generation
|
| 149 |
+
│ │ └── environment_domain.py
|
| 150 |
+
│ ├── belief/ # Bayesian inference
|
| 151 |
+
│ │ └── belief_domain.py
|
| 152 |
+
│ └── coordination/ # Game orchestration
|
| 153 |
+
│ └── game_coordination.py
|
| 154 |
+
├── ui/ # User interface
|
| 155 |
+
│ └── gradio_interface.py
|
| 156 |
+
├── tests/ # Comprehensive test suite
|
| 157 |
+
│ ├── test_environment_domain.py
|
| 158 |
+
│ ├── test_belief_domain.py
|
| 159 |
+
│ └── test_game_coordination.py
|
| 160 |
+
├── app.py # Main entry point
|
| 161 |
+
├── requirements.txt # Dependencies
|
| 162 |
+
├── CLAUDE.md # Project specifications
|
| 163 |
+
└── README.md # This file
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
## 🔬 Key Features
|
| 167 |
+
|
| 168 |
+
### Bayesian Inference Engine
|
| 169 |
+
- **Proper Bayesian Updates**: Uses Bayes' rule for belief updates
|
| 170 |
+
- **Entropy Calculation**: Measures uncertainty in beliefs
|
| 171 |
+
- **Evidence Integration**: Combines multiple pieces of evidence
|
| 172 |
+
- **Impossible Evidence Handling**: Gracefully handles contradictory evidence
|
| 173 |
+
|
| 174 |
+
### Reproducible Experiments
|
| 175 |
+
- **Seeded Randomness**: Reproducible results for testing
|
| 176 |
+
- **Deterministic Behavior**: Same seed produces same game sequence
|
| 177 |
+
- **Statistical Analysis**: Track accuracy and convergence
|
| 178 |
+
|
| 179 |
+
### Clean Architecture
|
| 180 |
+
- **Domain Separation**: Pure domains with no cross-dependencies
|
| 181 |
+
- **Testable Components**: Each domain independently testable
|
| 182 |
+
- **Extensible Design**: Easy to add new features or modify rules
|
| 183 |
+
|
| 184 |
+
## 🎓 Educational Value
|
| 185 |
+
|
| 186 |
+
This implementation demonstrates:
|
| 187 |
+
|
| 188 |
+
- **Bayesian Inference**: Real-world application of Bayes' rule
|
| 189 |
+
- **Uncertainty Quantification**: How beliefs evolve with evidence
|
| 190 |
+
- **Information Theory**: Entropy as a measure of uncertainty
|
| 191 |
+
- **Domain-Driven Design**: Clean software architecture patterns
|
| 192 |
+
- **Test-Driven Development**: Comprehensive testing strategies
|
| 193 |
+
|
| 194 |
+
## 🛠️ Development
|
| 195 |
+
|
| 196 |
+
### Key Dependencies
|
| 197 |
+
- `gradio`: Web interface framework
|
| 198 |
+
- `numpy`: Numerical computations for Bayesian inference
|
| 199 |
+
- `matplotlib`: Belief distribution visualization
|
| 200 |
+
- `pytest`: Testing framework
|
| 201 |
+
|
| 202 |
+
### Design Principles
|
| 203 |
+
1. **Pure Functions**: Domains contain pure, testable functions
|
| 204 |
+
2. **Immutable Data**: Evidence and belief updates are immutable
|
| 205 |
+
3. **Clear Interfaces**: Well-defined boundaries between domains
|
| 206 |
+
4. **Comprehensive Testing**: Every component thoroughly tested
|
| 207 |
+
|
| 208 |
+
### Contributing
|
| 209 |
+
1. Follow the existing domain-driven architecture
|
| 210 |
+
2. Add tests for any new functionality
|
| 211 |
+
3. Maintain clean separation between domains
|
| 212 |
+
4. Update documentation for new features
|
| 213 |
+
|
| 214 |
+
## 📊 Example Game Flow
|
| 215 |
+
|
| 216 |
+
```
|
| 217 |
+
Round 1: Evidence "higher" (dice roll > target)
|
| 218 |
+
├─ P(roll>1)=5/6, P(roll>2)=4/6, ..., P(roll>6)=0/6
|
| 219 |
+
├─ Lower targets become more likely
|
| 220 |
+
└─ Entropy: 2.15 bits
|
| 221 |
+
|
| 222 |
+
Round 2: Evidence "lower" (dice roll < target)
|
| 223 |
+
├─ P(roll<1)=0/6, P(roll<2)=1/6, ..., P(roll<6)=5/6
|
| 224 |
+
├─ Higher targets become more likely
|
| 225 |
+
└─ Entropy: 1.97 bits
|
| 226 |
+
|
| 227 |
+
Round 3: Evidence "same" (dice roll = target)
|
| 228 |
+
├─ P(roll=target) = 1/6 for all targets
|
| 229 |
+
├─ Beliefs remain proportional to previous round
|
| 230 |
+
└─ Entropy: 1.97 bits (unchanged)
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
## 🚀 Deployment
|
| 234 |
+
|
| 235 |
+
Ready for deployment on:
|
| 236 |
+
- **Hugging Face Spaces**: Direct deployment support
|
| 237 |
+
- **Local Server**: Built-in Gradio server
|
| 238 |
+
- **Cloud Platforms**: Standard Python web app deployment
|
| 239 |
+
|
| 240 |
+
---
|
| 241 |
+
|
| 242 |
+
**Built with ❤️ using Domain-Driven Design and Bayesian Inference**
|
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Bayesian Game - Hugging Face entry point
|
| 3 |
+
|
| 4 |
+
A Bayesian Game implementation featuring a Belief-based Agent using domain-driven design.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from ui.gradio_interface import create_interface
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main():
|
| 11 |
+
"""Main entry point for the Bayesian Game application."""
|
| 12 |
+
demo = create_interface()
|
| 13 |
+
|
| 14 |
+
# Launch with Hugging Face compatible settings
|
| 15 |
+
demo.launch(
|
| 16 |
+
server_name="0.0.0.0",
|
| 17 |
+
server_port=7860,
|
| 18 |
+
share=False, # Set to True for public sharing if needed
|
| 19 |
+
show_error=True
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
if __name__ == "__main__":
|
| 24 |
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Domains package initialization
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Belief domain package initialization
|
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import List, Literal
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class BeliefUpdate:
|
| 8 |
+
"""Update information for Bayesian belief state."""
|
| 9 |
+
comparison_result: Literal["higher", "lower", "same"]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class BayesianBeliefState:
|
| 13 |
+
"""Bayesian belief state for inferring target die value.
|
| 14 |
+
|
| 15 |
+
Handles pure Bayesian inference without knowledge of actual values.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, dice_sides: int = 6):
|
| 19 |
+
"""Initialize belief state with uniform prior.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
dice_sides: Number of sides on the dice
|
| 23 |
+
"""
|
| 24 |
+
self.dice_sides = dice_sides
|
| 25 |
+
# Uniform prior over all possible target values
|
| 26 |
+
self.beliefs = np.ones(dice_sides) / dice_sides
|
| 27 |
+
self.evidence_history: List[BeliefUpdate] = []
|
| 28 |
+
|
| 29 |
+
def get_current_beliefs(self) -> np.ndarray:
|
| 30 |
+
"""Get current belief distribution over target values.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
Array of probabilities for each possible target value (1 to dice_sides)
|
| 34 |
+
"""
|
| 35 |
+
return self.beliefs.copy()
|
| 36 |
+
|
| 37 |
+
def get_most_likely_target(self) -> int:
|
| 38 |
+
"""Get the most likely target value based on current beliefs.
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Most likely target value (1-indexed)
|
| 42 |
+
"""
|
| 43 |
+
return np.argmax(self.beliefs) + 1
|
| 44 |
+
|
| 45 |
+
def get_belief_for_target(self, target: int) -> float:
|
| 46 |
+
"""Get belief probability for a specific target value.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
target: Target value (1 to dice_sides)
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Probability that target is the true value
|
| 53 |
+
"""
|
| 54 |
+
if not (1 <= target <= self.dice_sides):
|
| 55 |
+
raise ValueError(f"Target must be between 1 and {self.dice_sides}")
|
| 56 |
+
return self.beliefs[target - 1]
|
| 57 |
+
|
| 58 |
+
def update_beliefs(self, evidence: BeliefUpdate) -> None:
|
| 59 |
+
"""Update beliefs based on new evidence using Bayes' rule.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
evidence: New evidence to incorporate
|
| 63 |
+
"""
|
| 64 |
+
self.evidence_history.append(evidence)
|
| 65 |
+
|
| 66 |
+
comparison_result = evidence.comparison_result
|
| 67 |
+
|
| 68 |
+
# Calculate likelihood for each possible target value
|
| 69 |
+
likelihoods = np.zeros(self.dice_sides)
|
| 70 |
+
|
| 71 |
+
for target_idx in range(self.dice_sides):
|
| 72 |
+
target_value = target_idx + 1
|
| 73 |
+
|
| 74 |
+
# Calculate P(comparison_result | target_value)
|
| 75 |
+
# This is the probability that ANY dice roll would produce this comparison result
|
| 76 |
+
if comparison_result == "higher":
|
| 77 |
+
# P(roll > target) = (dice_sides - target) / dice_sides
|
| 78 |
+
likelihood = (self.dice_sides - target_value) / self.dice_sides
|
| 79 |
+
elif comparison_result == "lower":
|
| 80 |
+
# P(roll < target) = (target - 1) / dice_sides
|
| 81 |
+
likelihood = (target_value - 1) / self.dice_sides
|
| 82 |
+
else: # comparison_result == "same"
|
| 83 |
+
# P(roll = target) = 1 / dice_sides
|
| 84 |
+
likelihood = 1 / self.dice_sides
|
| 85 |
+
|
| 86 |
+
likelihoods[target_idx] = likelihood
|
| 87 |
+
|
| 88 |
+
# Apply Bayes' rule: posterior ∝ prior × likelihood
|
| 89 |
+
self.beliefs = self.beliefs * likelihoods
|
| 90 |
+
|
| 91 |
+
# Normalize to ensure probabilities sum to 1
|
| 92 |
+
total_belief = np.sum(self.beliefs)
|
| 93 |
+
if total_belief > 0:
|
| 94 |
+
self.beliefs = self.beliefs / total_belief
|
| 95 |
+
else:
|
| 96 |
+
# If all likelihoods are 0 (shouldn't happen with valid evidence),
|
| 97 |
+
# reset to uniform distribution
|
| 98 |
+
self.beliefs = np.ones(self.dice_sides) / self.dice_sides
|
| 99 |
+
|
| 100 |
+
def reset_beliefs(self) -> None:
|
| 101 |
+
"""Reset beliefs to uniform prior and clear evidence history."""
|
| 102 |
+
self.beliefs = np.ones(self.dice_sides) / self.dice_sides
|
| 103 |
+
self.evidence_history = []
|
| 104 |
+
|
| 105 |
+
def get_entropy(self) -> float:
|
| 106 |
+
"""Calculate entropy of current belief distribution.
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Entropy in bits (higher = more uncertain)
|
| 110 |
+
"""
|
| 111 |
+
# Avoid log(0) by filtering out zero probabilities
|
| 112 |
+
non_zero_beliefs = self.beliefs[self.beliefs > 0]
|
| 113 |
+
if len(non_zero_beliefs) == 0:
|
| 114 |
+
return 0.0
|
| 115 |
+
return -np.sum(non_zero_beliefs * np.log2(non_zero_beliefs))
|
| 116 |
+
|
| 117 |
+
def get_evidence_count(self) -> int:
|
| 118 |
+
"""Get number of evidence updates received.
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Number of evidence updates
|
| 122 |
+
"""
|
| 123 |
+
return len(self.evidence_history)
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Coordination domain package initialization
|
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import List, Dict, Any
|
| 3 |
+
from enum import Enum
|
| 4 |
+
|
| 5 |
+
from ..environment.environment_domain import Environment, EnvironmentEvidence
|
| 6 |
+
from ..belief.belief_domain import BayesianBeliefState, BeliefUpdate
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class GamePhase(Enum):
|
| 10 |
+
"""Phases of the Bayesian Game."""
|
| 11 |
+
SETUP = "setup"
|
| 12 |
+
PLAYING = "playing"
|
| 13 |
+
FINISHED = "finished"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class GameState:
|
| 18 |
+
"""Current state of the Bayesian Game."""
|
| 19 |
+
round_number: int
|
| 20 |
+
max_rounds: int
|
| 21 |
+
phase: GamePhase
|
| 22 |
+
target_value: int = None
|
| 23 |
+
evidence_history: List[EnvironmentEvidence] = None
|
| 24 |
+
current_beliefs: List[float] = None
|
| 25 |
+
most_likely_target: int = None
|
| 26 |
+
belief_entropy: float = None
|
| 27 |
+
|
| 28 |
+
def __post_init__(self):
|
| 29 |
+
if self.evidence_history is None:
|
| 30 |
+
self.evidence_history = []
|
| 31 |
+
if self.current_beliefs is None:
|
| 32 |
+
self.current_beliefs = []
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class BayesianGame:
|
| 36 |
+
"""Main orchestration class for the Bayesian Game.
|
| 37 |
+
|
| 38 |
+
Coordinates between Environment and Belief domains while maintaining
|
| 39 |
+
clean separation of concerns.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
def __init__(self, dice_sides: int = 6, max_rounds: int = 10, seed: int = None):
|
| 43 |
+
"""Initialize the Bayesian Game.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
dice_sides: Number of sides on the dice
|
| 47 |
+
max_rounds: Maximum number of rounds to play
|
| 48 |
+
seed: Random seed for reproducible results
|
| 49 |
+
"""
|
| 50 |
+
self.dice_sides = dice_sides
|
| 51 |
+
self.max_rounds = max_rounds
|
| 52 |
+
|
| 53 |
+
# Initialize domains
|
| 54 |
+
self.environment = Environment(dice_sides=dice_sides, seed=seed)
|
| 55 |
+
self.belief_state = BayesianBeliefState(dice_sides=dice_sides)
|
| 56 |
+
|
| 57 |
+
# Initialize game state
|
| 58 |
+
self.game_state = GameState(
|
| 59 |
+
round_number=0,
|
| 60 |
+
max_rounds=max_rounds,
|
| 61 |
+
phase=GamePhase.SETUP
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
def start_new_game(self, target_value: int = None) -> GameState:
|
| 65 |
+
"""Start a new game with optional specific target value.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
target_value: Specific target value, or None for random
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Initial game state
|
| 72 |
+
"""
|
| 73 |
+
# Reset domains
|
| 74 |
+
self.belief_state.reset_beliefs()
|
| 75 |
+
|
| 76 |
+
# Set target value
|
| 77 |
+
if target_value is not None:
|
| 78 |
+
self.environment.set_target_value(target_value)
|
| 79 |
+
else:
|
| 80 |
+
self.environment.generate_random_target()
|
| 81 |
+
|
| 82 |
+
# Reset game state
|
| 83 |
+
self.game_state = GameState(
|
| 84 |
+
round_number=0,
|
| 85 |
+
max_rounds=self.max_rounds,
|
| 86 |
+
phase=GamePhase.PLAYING,
|
| 87 |
+
target_value=self.environment.get_target_value(),
|
| 88 |
+
evidence_history=[],
|
| 89 |
+
current_beliefs=self.belief_state.get_current_beliefs().tolist(),
|
| 90 |
+
most_likely_target=self.belief_state.get_most_likely_target(),
|
| 91 |
+
belief_entropy=self.belief_state.get_entropy()
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
return self.game_state
|
| 95 |
+
|
| 96 |
+
def play_round(self) -> GameState:
|
| 97 |
+
"""Play one round of the game.
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
Updated game state after the round
|
| 101 |
+
|
| 102 |
+
Raises:
|
| 103 |
+
ValueError: If game is not in playing phase
|
| 104 |
+
"""
|
| 105 |
+
if self.game_state.phase != GamePhase.PLAYING:
|
| 106 |
+
raise ValueError("Game is not in playing phase")
|
| 107 |
+
|
| 108 |
+
if self.game_state.round_number >= self.max_rounds:
|
| 109 |
+
raise ValueError("Game has already finished")
|
| 110 |
+
|
| 111 |
+
# Generate evidence from environment
|
| 112 |
+
evidence = self.environment.roll_dice_and_compare()
|
| 113 |
+
|
| 114 |
+
# Update belief state (only pass comparison result, not dice roll)
|
| 115 |
+
belief_update = BeliefUpdate(
|
| 116 |
+
comparison_result=evidence.comparison_result
|
| 117 |
+
)
|
| 118 |
+
self.belief_state.update_beliefs(belief_update)
|
| 119 |
+
|
| 120 |
+
# Update game state
|
| 121 |
+
self.game_state.round_number += 1
|
| 122 |
+
self.game_state.evidence_history.append(evidence)
|
| 123 |
+
self.game_state.current_beliefs = self.belief_state.get_current_beliefs().tolist()
|
| 124 |
+
self.game_state.most_likely_target = self.belief_state.get_most_likely_target()
|
| 125 |
+
self.game_state.belief_entropy = self.belief_state.get_entropy()
|
| 126 |
+
|
| 127 |
+
# Check if game is finished
|
| 128 |
+
if self.game_state.round_number >= self.max_rounds:
|
| 129 |
+
self.game_state.phase = GamePhase.FINISHED
|
| 130 |
+
|
| 131 |
+
return self.game_state
|
| 132 |
+
|
| 133 |
+
def get_current_state(self) -> GameState:
|
| 134 |
+
"""Get current game state.
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
Current game state
|
| 138 |
+
"""
|
| 139 |
+
return self.game_state
|
| 140 |
+
|
| 141 |
+
def is_game_finished(self) -> bool:
|
| 142 |
+
"""Check if game is finished.
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
True if game is finished
|
| 146 |
+
"""
|
| 147 |
+
return self.game_state.phase == GamePhase.FINISHED
|
| 148 |
+
|
| 149 |
+
def get_final_guess_accuracy(self) -> float:
|
| 150 |
+
"""Get accuracy of final guess (belief for true target).
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
Probability assigned to true target value
|
| 154 |
+
|
| 155 |
+
Raises:
|
| 156 |
+
ValueError: If target value is not set
|
| 157 |
+
"""
|
| 158 |
+
if self.game_state.target_value is None:
|
| 159 |
+
raise ValueError("Target value not set")
|
| 160 |
+
|
| 161 |
+
return self.belief_state.get_belief_for_target(self.game_state.target_value)
|
| 162 |
+
|
| 163 |
+
def was_final_guess_correct(self) -> bool:
|
| 164 |
+
"""Check if the most likely target matches the true target.
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
True if most likely target equals true target
|
| 168 |
+
|
| 169 |
+
Raises:
|
| 170 |
+
ValueError: If target value is not set
|
| 171 |
+
"""
|
| 172 |
+
if self.game_state.target_value is None:
|
| 173 |
+
raise ValueError("Target value not set")
|
| 174 |
+
|
| 175 |
+
return bool(self.game_state.most_likely_target == self.game_state.target_value)
|
| 176 |
+
|
| 177 |
+
def get_game_summary(self) -> Dict[str, Any]:
|
| 178 |
+
"""Get summary of completed game.
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
Dictionary with game summary statistics
|
| 182 |
+
"""
|
| 183 |
+
return {
|
| 184 |
+
"rounds_played": self.game_state.round_number,
|
| 185 |
+
"max_rounds": self.max_rounds,
|
| 186 |
+
"true_target": self.game_state.target_value,
|
| 187 |
+
"final_guess": self.game_state.most_likely_target,
|
| 188 |
+
"guess_correct": self.was_final_guess_correct(),
|
| 189 |
+
"final_accuracy": self.get_final_guess_accuracy(),
|
| 190 |
+
"final_entropy": self.game_state.belief_entropy,
|
| 191 |
+
"evidence_count": len(self.game_state.evidence_history),
|
| 192 |
+
"final_beliefs": dict(enumerate(self.game_state.current_beliefs, 1))
|
| 193 |
+
}
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Environment domain package initialization
|
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import Literal
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class EnvironmentEvidence:
|
| 8 |
+
"""Evidence generated by the environment - dice roll and comparison result."""
|
| 9 |
+
dice_roll: int
|
| 10 |
+
comparison_result: Literal["higher", "lower", "same"]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Environment:
|
| 14 |
+
"""Environment domain that generates target values and evidence.
|
| 15 |
+
|
| 16 |
+
Has no knowledge of probabilities - purely generates observable evidence.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, dice_sides: int = 6, seed: int = None):
|
| 20 |
+
"""Initialize environment with dice configuration.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
dice_sides: Number of sides on the dice (default 6)
|
| 24 |
+
seed: Random seed for reproducible results
|
| 25 |
+
"""
|
| 26 |
+
self.dice_sides = dice_sides
|
| 27 |
+
self._random_state = random.Random(seed) if seed is not None else random.Random()
|
| 28 |
+
self._target_value = None
|
| 29 |
+
|
| 30 |
+
def set_target_value(self, target: int) -> None:
|
| 31 |
+
"""Set the target die value that Player 2 must guess.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
target: Target value (1 to dice_sides)
|
| 35 |
+
"""
|
| 36 |
+
if not (1 <= target <= self.dice_sides):
|
| 37 |
+
raise ValueError(f"Target must be between 1 and {self.dice_sides}")
|
| 38 |
+
self._target_value = target
|
| 39 |
+
|
| 40 |
+
def get_target_value(self) -> int:
|
| 41 |
+
"""Get the current target value.
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
Current target value
|
| 45 |
+
|
| 46 |
+
Raises:
|
| 47 |
+
ValueError: If target value hasn't been set
|
| 48 |
+
"""
|
| 49 |
+
if self._target_value is None:
|
| 50 |
+
raise ValueError("Target value not set")
|
| 51 |
+
return self._target_value
|
| 52 |
+
|
| 53 |
+
def generate_random_target(self) -> int:
|
| 54 |
+
"""Generate and set a random target value.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
The generated target value
|
| 58 |
+
"""
|
| 59 |
+
target = self._random_state.randint(1, self.dice_sides)
|
| 60 |
+
self.set_target_value(target)
|
| 61 |
+
return target
|
| 62 |
+
|
| 63 |
+
def roll_dice_and_compare(self) -> EnvironmentEvidence:
|
| 64 |
+
"""Roll dice and compare to target, generating evidence.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
EnvironmentEvidence with dice roll and comparison result
|
| 68 |
+
|
| 69 |
+
Raises:
|
| 70 |
+
ValueError: If target value hasn't been set
|
| 71 |
+
"""
|
| 72 |
+
if self._target_value is None:
|
| 73 |
+
raise ValueError("Target value not set")
|
| 74 |
+
|
| 75 |
+
dice_roll = self._random_state.randint(1, self.dice_sides)
|
| 76 |
+
|
| 77 |
+
if dice_roll > self._target_value:
|
| 78 |
+
comparison_result = "higher"
|
| 79 |
+
elif dice_roll < self._target_value:
|
| 80 |
+
comparison_result = "lower"
|
| 81 |
+
else:
|
| 82 |
+
comparison_result = "same"
|
| 83 |
+
|
| 84 |
+
return EnvironmentEvidence(
|
| 85 |
+
dice_roll=dice_roll,
|
| 86 |
+
comparison_result=comparison_result
|
| 87 |
+
)
|
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
numpy>=1.21.0
|
| 3 |
+
matplotlib>=3.5.0
|
| 4 |
+
pytest>=7.0.0
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Test package initialization
|
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Architectural constraint tests to ensure proper domain separation.
|
| 3 |
+
|
| 4 |
+
These tests verify that the key architectural principles are maintained:
|
| 5 |
+
1. Belief domain receives only comparison results, not dice roll values
|
| 6 |
+
2. Information flows correctly through the coordination layer
|
| 7 |
+
3. Domain boundaries are properly enforced
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import pytest
|
| 11 |
+
import inspect
|
| 12 |
+
from domains.belief.belief_domain import BeliefUpdate, BayesianBeliefState
|
| 13 |
+
from domains.environment.environment_domain import EnvironmentEvidence
|
| 14 |
+
from domains.coordination.game_coordination import BayesianGame
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TestArchitecturalConstraints:
|
| 18 |
+
"""Test architectural constraints and domain separation."""
|
| 19 |
+
|
| 20 |
+
def test_belief_update_dataclass_structure(self):
|
| 21 |
+
"""Test that BeliefUpdate contains only comparison_result field."""
|
| 22 |
+
# Get all fields of BeliefUpdate
|
| 23 |
+
fields = BeliefUpdate.__dataclass_fields__
|
| 24 |
+
|
| 25 |
+
# Should only contain comparison_result
|
| 26 |
+
assert len(fields) == 1, f"BeliefUpdate should have exactly 1 field, got {len(fields)}: {list(fields.keys())}"
|
| 27 |
+
assert "comparison_result" in fields, "BeliefUpdate must contain comparison_result field"
|
| 28 |
+
assert "dice_roll" not in fields, "BeliefUpdate MUST NOT contain dice_roll field"
|
| 29 |
+
|
| 30 |
+
def test_environment_evidence_dataclass_structure(self):
|
| 31 |
+
"""Test that EnvironmentEvidence contains both dice_roll and comparison_result."""
|
| 32 |
+
# Get all fields of EnvironmentEvidence
|
| 33 |
+
fields = EnvironmentEvidence.__dataclass_fields__
|
| 34 |
+
|
| 35 |
+
# Should contain both fields
|
| 36 |
+
assert len(fields) == 2, f"EnvironmentEvidence should have exactly 2 fields, got {len(fields)}: {list(fields.keys())}"
|
| 37 |
+
assert "dice_roll" in fields, "EnvironmentEvidence must contain dice_roll field"
|
| 38 |
+
assert "comparison_result" in fields, "EnvironmentEvidence must contain comparison_result field"
|
| 39 |
+
|
| 40 |
+
def test_belief_state_methods_no_dice_roll_parameters(self):
|
| 41 |
+
"""Test that BayesianBeliefState methods don't accept dice_roll parameters."""
|
| 42 |
+
# Get all methods of BayesianBeliefState
|
| 43 |
+
methods = inspect.getmembers(BayesianBeliefState, predicate=inspect.isfunction)
|
| 44 |
+
|
| 45 |
+
for method_name, method in methods:
|
| 46 |
+
if method_name.startswith('_'):
|
| 47 |
+
continue # Skip private methods
|
| 48 |
+
|
| 49 |
+
signature = inspect.signature(method)
|
| 50 |
+
param_names = list(signature.parameters.keys())
|
| 51 |
+
|
| 52 |
+
assert "dice_roll" not in param_names, f"Method {method_name} MUST NOT have dice_roll parameter"
|
| 53 |
+
|
| 54 |
+
def test_belief_update_creation_without_dice_roll(self):
|
| 55 |
+
"""Test that BeliefUpdate can be created without dice_roll."""
|
| 56 |
+
# This should work (only comparison_result)
|
| 57 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 58 |
+
assert update.comparison_result == "higher"
|
| 59 |
+
|
| 60 |
+
# This should fail if dice_roll field exists
|
| 61 |
+
try:
|
| 62 |
+
# This should raise TypeError if dice_roll is not a field
|
| 63 |
+
BeliefUpdate(dice_roll=3, comparison_result="higher")
|
| 64 |
+
pytest.fail("BeliefUpdate should not accept dice_roll parameter")
|
| 65 |
+
except TypeError:
|
| 66 |
+
pass # Expected - dice_roll should not be a valid parameter
|
| 67 |
+
|
| 68 |
+
def test_information_filtering_in_coordination(self):
|
| 69 |
+
"""Test that game coordination properly filters information to belief domain."""
|
| 70 |
+
game = BayesianGame(seed=42)
|
| 71 |
+
game.start_new_game(target_value=3)
|
| 72 |
+
|
| 73 |
+
# Get initial belief state
|
| 74 |
+
initial_beliefs = game.belief_state.get_current_beliefs()
|
| 75 |
+
|
| 76 |
+
# Play a round (this should trigger proper information filtering)
|
| 77 |
+
game.play_round()
|
| 78 |
+
|
| 79 |
+
# Verify that belief state received update (beliefs changed)
|
| 80 |
+
updated_beliefs = game.belief_state.get_current_beliefs()
|
| 81 |
+
assert not all(a == b for a, b in zip(initial_beliefs, updated_beliefs)), \
|
| 82 |
+
"Beliefs should change after receiving evidence"
|
| 83 |
+
|
| 84 |
+
# Verify that evidence history in belief domain contains only comparison results
|
| 85 |
+
for evidence in game.belief_state.evidence_history:
|
| 86 |
+
assert hasattr(evidence, "comparison_result"), "Belief evidence must have comparison_result"
|
| 87 |
+
assert not hasattr(evidence, "dice_roll"), "Belief evidence MUST NOT have dice_roll"
|
| 88 |
+
|
| 89 |
+
def test_domain_import_isolation(self):
|
| 90 |
+
"""Test that belief domain doesn't import environment domain."""
|
| 91 |
+
import domains.belief.belief_domain as belief_module
|
| 92 |
+
|
| 93 |
+
# Get all imports in the belief domain module
|
| 94 |
+
belief_source = inspect.getsource(belief_module)
|
| 95 |
+
|
| 96 |
+
# Should not import environment domain
|
| 97 |
+
assert "from domains.environment" not in belief_source, \
|
| 98 |
+
"Belief domain MUST NOT import environment domain"
|
| 99 |
+
assert "import domains.environment" not in belief_source, \
|
| 100 |
+
"Belief domain MUST NOT import environment domain"
|
| 101 |
+
|
| 102 |
+
def test_proper_bayesian_calculation_structure(self):
|
| 103 |
+
"""Test that belief updates use probabilistic calculations."""
|
| 104 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 105 |
+
|
| 106 |
+
# Apply "higher" evidence
|
| 107 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 108 |
+
belief_state.update_beliefs(update)
|
| 109 |
+
|
| 110 |
+
# Verify that probabilities follow expected pattern for "higher"
|
| 111 |
+
# Target 1: P(roll > 1) = 5/6, should be highest
|
| 112 |
+
# Target 6: P(roll > 6) = 0/6, should be zero
|
| 113 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 114 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 115 |
+
|
| 116 |
+
assert prob_1 > prob_6, "Higher evidence should favor lower targets"
|
| 117 |
+
assert abs(prob_6 - 0.0) < 1e-10, "Target 6 should have zero probability after 'higher' evidence"
|
| 118 |
+
|
| 119 |
+
def test_coordination_layer_responsibility(self):
|
| 120 |
+
"""Test that coordination layer properly orchestrates without leaking information."""
|
| 121 |
+
game = BayesianGame(seed=42)
|
| 122 |
+
game.start_new_game(target_value=4)
|
| 123 |
+
|
| 124 |
+
# Play a round to generate evidence
|
| 125 |
+
state = game.play_round()
|
| 126 |
+
|
| 127 |
+
# Game state should have full information (for display)
|
| 128 |
+
assert hasattr(state.evidence_history[0], "dice_roll"), \
|
| 129 |
+
"Game state should maintain full evidence for display"
|
| 130 |
+
assert hasattr(state.evidence_history[0], "comparison_result"), \
|
| 131 |
+
"Game state should maintain comparison results"
|
| 132 |
+
|
| 133 |
+
# But belief state should only have comparison results
|
| 134 |
+
belief_evidence = game.belief_state.evidence_history[0]
|
| 135 |
+
assert hasattr(belief_evidence, "comparison_result"), \
|
| 136 |
+
"Belief evidence must have comparison_result"
|
| 137 |
+
assert not hasattr(belief_evidence, "dice_roll"), \
|
| 138 |
+
"Belief evidence MUST NOT have dice_roll"
|
| 139 |
+
|
| 140 |
+
def test_no_hard_coded_probabilities(self):
|
| 141 |
+
"""Test that belief calculations are dynamic, not hard-coded."""
|
| 142 |
+
# Test with different dice sides to ensure calculations are dynamic
|
| 143 |
+
for dice_sides in [4, 6, 8, 10]:
|
| 144 |
+
belief_state = BayesianBeliefState(dice_sides=dice_sides)
|
| 145 |
+
|
| 146 |
+
# Apply "higher" evidence
|
| 147 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 148 |
+
belief_state.update_beliefs(update)
|
| 149 |
+
|
| 150 |
+
# Target 1 should have highest probability: P(roll > 1) = (dice_sides - 1) / dice_sides
|
| 151 |
+
# Last target should have zero probability: P(roll > dice_sides) = 0
|
| 152 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 153 |
+
prob_last = belief_state.get_belief_for_target(dice_sides)
|
| 154 |
+
|
| 155 |
+
expected_prob_1_unnormalized = (dice_sides - 1) / dice_sides
|
| 156 |
+
|
| 157 |
+
assert prob_1 > prob_last, f"Target 1 should be more likely than target {dice_sides}"
|
| 158 |
+
assert abs(prob_last - 0.0) < 1e-10, f"Target {dice_sides} should have zero probability"
|
| 159 |
+
assert prob_1 > 0, "Target 1 should have non-zero probability"
|
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import numpy as np
|
| 3 |
+
from domains.belief.belief_domain import BayesianBeliefState, BeliefUpdate
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TestBeliefUpdate:
|
| 7 |
+
"""Test the BeliefUpdate dataclass."""
|
| 8 |
+
|
| 9 |
+
def test_belief_update_creation(self):
|
| 10 |
+
"""Test creating belief update with valid data."""
|
| 11 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 12 |
+
assert update.comparison_result == "higher"
|
| 13 |
+
|
| 14 |
+
def test_belief_update_all_results(self):
|
| 15 |
+
"""Test belief update with all comparison results."""
|
| 16 |
+
valid_results = ["higher", "lower", "same"]
|
| 17 |
+
for result in valid_results:
|
| 18 |
+
update = BeliefUpdate(comparison_result=result)
|
| 19 |
+
assert update.comparison_result == result
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TestBayesianBeliefState:
|
| 23 |
+
"""Test the BayesianBeliefState class."""
|
| 24 |
+
|
| 25 |
+
def test_initialization_default(self):
|
| 26 |
+
"""Test initialization with default parameters."""
|
| 27 |
+
belief_state = BayesianBeliefState()
|
| 28 |
+
|
| 29 |
+
assert belief_state.dice_sides == 6
|
| 30 |
+
assert len(belief_state.beliefs) == 6
|
| 31 |
+
assert np.allclose(belief_state.beliefs, 1/6) # Uniform prior
|
| 32 |
+
assert len(belief_state.evidence_history) == 0
|
| 33 |
+
|
| 34 |
+
def test_initialization_custom(self):
|
| 35 |
+
"""Test initialization with custom dice sides."""
|
| 36 |
+
belief_state = BayesianBeliefState(dice_sides=8)
|
| 37 |
+
|
| 38 |
+
assert belief_state.dice_sides == 8
|
| 39 |
+
assert len(belief_state.beliefs) == 8
|
| 40 |
+
assert np.allclose(belief_state.beliefs, 1/8) # Uniform prior
|
| 41 |
+
|
| 42 |
+
def test_get_current_beliefs(self):
|
| 43 |
+
"""Test getting current beliefs returns copy."""
|
| 44 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 45 |
+
beliefs = belief_state.get_current_beliefs()
|
| 46 |
+
|
| 47 |
+
# Should be a copy, not reference
|
| 48 |
+
beliefs[0] = 0.5
|
| 49 |
+
assert not np.array_equal(beliefs, belief_state.beliefs)
|
| 50 |
+
assert np.allclose(belief_state.beliefs, 1/6)
|
| 51 |
+
|
| 52 |
+
def test_get_most_likely_target_uniform(self):
|
| 53 |
+
"""Test getting most likely target with uniform distribution."""
|
| 54 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 55 |
+
|
| 56 |
+
# With uniform distribution, should return first target (index 0 + 1)
|
| 57 |
+
most_likely = belief_state.get_most_likely_target()
|
| 58 |
+
assert most_likely == 1
|
| 59 |
+
|
| 60 |
+
def test_get_most_likely_target_after_update(self):
|
| 61 |
+
"""Test getting most likely target after belief update."""
|
| 62 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 63 |
+
|
| 64 |
+
# Update with evidence that favors lower target values
|
| 65 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 66 |
+
belief_state.update_beliefs(update)
|
| 67 |
+
|
| 68 |
+
# Lower targets are more likely to result in "higher" comparison
|
| 69 |
+
most_likely = belief_state.get_most_likely_target()
|
| 70 |
+
assert most_likely in range(1, 7) # Should be valid
|
| 71 |
+
|
| 72 |
+
def test_get_belief_for_target_valid(self):
|
| 73 |
+
"""Test getting belief for valid target values."""
|
| 74 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 75 |
+
|
| 76 |
+
for target in range(1, 7):
|
| 77 |
+
belief = belief_state.get_belief_for_target(target)
|
| 78 |
+
assert abs(belief - 1/6) < 1e-10 # Should be uniform initially
|
| 79 |
+
|
| 80 |
+
def test_get_belief_for_target_invalid(self):
|
| 81 |
+
"""Test getting belief for invalid target values raises error."""
|
| 82 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 83 |
+
|
| 84 |
+
invalid_targets = [0, 7, -1, 10]
|
| 85 |
+
for target in invalid_targets:
|
| 86 |
+
with pytest.raises(ValueError, match="Target must be between 1 and 6"):
|
| 87 |
+
belief_state.get_belief_for_target(target)
|
| 88 |
+
|
| 89 |
+
def test_update_beliefs_higher(self):
|
| 90 |
+
"""Test belief update with 'higher' evidence."""
|
| 91 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 92 |
+
|
| 93 |
+
# Evidence: comparison result is "higher" (dice roll > target)
|
| 94 |
+
# This is more likely for lower target values
|
| 95 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 96 |
+
belief_state.update_beliefs(update)
|
| 97 |
+
|
| 98 |
+
# Lower targets should have higher probability than higher targets
|
| 99 |
+
# Target 1: P(roll > 1) = 5/6
|
| 100 |
+
# Target 6: P(roll > 6) = 0/6
|
| 101 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 102 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 103 |
+
|
| 104 |
+
assert prob_1 > prob_6 # Target 1 should be more likely than target 6
|
| 105 |
+
assert abs(prob_6 - 0.0) < 1e-10 # Target 6 should have zero probability
|
| 106 |
+
|
| 107 |
+
def test_update_beliefs_lower(self):
|
| 108 |
+
"""Test belief update with 'lower' evidence."""
|
| 109 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 110 |
+
|
| 111 |
+
# Evidence: comparison result is "lower" (dice roll < target)
|
| 112 |
+
# This is more likely for higher target values
|
| 113 |
+
update = BeliefUpdate(comparison_result="lower")
|
| 114 |
+
belief_state.update_beliefs(update)
|
| 115 |
+
|
| 116 |
+
# Higher targets should have higher probability than lower targets
|
| 117 |
+
# Target 1: P(roll < 1) = 0/6
|
| 118 |
+
# Target 6: P(roll < 6) = 5/6
|
| 119 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 120 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 121 |
+
|
| 122 |
+
assert prob_6 > prob_1 # Target 6 should be more likely than target 1
|
| 123 |
+
assert abs(prob_1 - 0.0) < 1e-10 # Target 1 should have zero probability
|
| 124 |
+
|
| 125 |
+
def test_update_beliefs_same(self):
|
| 126 |
+
"""Test belief update with 'same' evidence."""
|
| 127 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 128 |
+
|
| 129 |
+
# Evidence: comparison result is "same" (dice roll = target)
|
| 130 |
+
# This has equal probability for all targets: P(roll = target) = 1/6
|
| 131 |
+
update = BeliefUpdate(comparison_result="same")
|
| 132 |
+
belief_state.update_beliefs(update)
|
| 133 |
+
|
| 134 |
+
# All targets should have equal probability since P(roll = target) = 1/6 for all
|
| 135 |
+
for target in range(1, 7):
|
| 136 |
+
prob = belief_state.get_belief_for_target(target)
|
| 137 |
+
assert abs(prob - 1/6) < 1e-10 # Should remain uniform
|
| 138 |
+
|
| 139 |
+
def test_update_beliefs_multiple(self):
|
| 140 |
+
"""Test multiple belief updates."""
|
| 141 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 142 |
+
|
| 143 |
+
# First update: "higher" (favors lower targets)
|
| 144 |
+
update1 = BeliefUpdate(comparison_result="higher")
|
| 145 |
+
belief_state.update_beliefs(update1)
|
| 146 |
+
|
| 147 |
+
# Second update: "lower" (favors higher targets)
|
| 148 |
+
update2 = BeliefUpdate(comparison_result="lower")
|
| 149 |
+
belief_state.update_beliefs(update2)
|
| 150 |
+
|
| 151 |
+
# The combination should favor middle targets
|
| 152 |
+
# Target 1: P(roll>1) * P(roll<1) = 5/6 * 0 = 0
|
| 153 |
+
# Target 6: P(roll>6) * P(roll<6) = 0 * 5/6 = 0
|
| 154 |
+
# Middle targets should have non-zero probability
|
| 155 |
+
|
| 156 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 157 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 158 |
+
prob_3 = belief_state.get_belief_for_target(3)
|
| 159 |
+
|
| 160 |
+
assert abs(prob_1 - 0.0) < 1e-10 # Target 1 should be eliminated
|
| 161 |
+
assert abs(prob_6 - 0.0) < 1e-10 # Target 6 should be eliminated
|
| 162 |
+
assert prob_3 > 0 # Middle targets should have some probability
|
| 163 |
+
|
| 164 |
+
def test_update_beliefs_evidence_history(self):
|
| 165 |
+
"""Test that evidence history is maintained."""
|
| 166 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 167 |
+
|
| 168 |
+
updates = [
|
| 169 |
+
BeliefUpdate(comparison_result="higher"),
|
| 170 |
+
BeliefUpdate(comparison_result="lower"),
|
| 171 |
+
BeliefUpdate(comparison_result="same")
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
for update in updates:
|
| 175 |
+
belief_state.update_beliefs(update)
|
| 176 |
+
|
| 177 |
+
assert len(belief_state.evidence_history) == 3
|
| 178 |
+
assert belief_state.evidence_history == updates
|
| 179 |
+
|
| 180 |
+
def test_reset_beliefs(self):
|
| 181 |
+
"""Test resetting beliefs to uniform prior."""
|
| 182 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 183 |
+
|
| 184 |
+
# Update beliefs
|
| 185 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 186 |
+
belief_state.update_beliefs(update)
|
| 187 |
+
|
| 188 |
+
# Verify beliefs changed from uniform
|
| 189 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 190 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 191 |
+
assert prob_1 != prob_6 # Should no longer be uniform
|
| 192 |
+
assert len(belief_state.evidence_history) == 1
|
| 193 |
+
|
| 194 |
+
# Reset beliefs
|
| 195 |
+
belief_state.reset_beliefs()
|
| 196 |
+
|
| 197 |
+
# Should be back to uniform
|
| 198 |
+
for target in range(1, 7):
|
| 199 |
+
assert abs(belief_state.get_belief_for_target(target) - 1/6) < 1e-10
|
| 200 |
+
assert len(belief_state.evidence_history) == 0
|
| 201 |
+
|
| 202 |
+
def test_get_entropy_uniform(self):
|
| 203 |
+
"""Test entropy calculation for uniform distribution."""
|
| 204 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 205 |
+
|
| 206 |
+
entropy = belief_state.get_entropy()
|
| 207 |
+
expected_entropy = np.log2(6) # Maximum entropy for 6 outcomes
|
| 208 |
+
assert abs(entropy - expected_entropy) < 1e-10
|
| 209 |
+
|
| 210 |
+
def test_get_entropy_certain(self):
|
| 211 |
+
"""Test entropy calculation for certain distribution."""
|
| 212 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 213 |
+
|
| 214 |
+
# Create a near-certain belief by applying many "higher" updates
|
| 215 |
+
# This will eventually make target 1 much more likely than others
|
| 216 |
+
for _ in range(10):
|
| 217 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 218 |
+
belief_state.update_beliefs(update)
|
| 219 |
+
|
| 220 |
+
entropy = belief_state.get_entropy()
|
| 221 |
+
max_entropy = np.log2(6)
|
| 222 |
+
assert entropy < max_entropy # Should be much less than maximum entropy
|
| 223 |
+
|
| 224 |
+
def test_get_entropy_partial(self):
|
| 225 |
+
"""Test entropy calculation for partial certainty."""
|
| 226 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 227 |
+
|
| 228 |
+
# Reduce uncertainty but don't eliminate it
|
| 229 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 230 |
+
belief_state.update_beliefs(update)
|
| 231 |
+
|
| 232 |
+
entropy = belief_state.get_entropy()
|
| 233 |
+
max_entropy = np.log2(6)
|
| 234 |
+
min_entropy = 0
|
| 235 |
+
|
| 236 |
+
# Should be between min and max
|
| 237 |
+
assert min_entropy < entropy < max_entropy
|
| 238 |
+
|
| 239 |
+
def test_get_evidence_count(self):
|
| 240 |
+
"""Test getting evidence count."""
|
| 241 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 242 |
+
|
| 243 |
+
assert belief_state.get_evidence_count() == 0
|
| 244 |
+
|
| 245 |
+
# Add some evidence
|
| 246 |
+
updates = [
|
| 247 |
+
BeliefUpdate(comparison_result="higher"),
|
| 248 |
+
BeliefUpdate(comparison_result="lower")
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
for i, update in enumerate(updates, 1):
|
| 252 |
+
belief_state.update_beliefs(update)
|
| 253 |
+
assert belief_state.get_evidence_count() == i
|
| 254 |
+
|
| 255 |
+
def test_beliefs_sum_to_one(self):
|
| 256 |
+
"""Test that beliefs always sum to 1 after updates."""
|
| 257 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 258 |
+
|
| 259 |
+
updates = [
|
| 260 |
+
BeliefUpdate(comparison_result="higher"),
|
| 261 |
+
BeliefUpdate(comparison_result="lower"),
|
| 262 |
+
BeliefUpdate(comparison_result="same"),
|
| 263 |
+
BeliefUpdate(comparison_result="higher")
|
| 264 |
+
]
|
| 265 |
+
|
| 266 |
+
# Check initial sum
|
| 267 |
+
assert abs(np.sum(belief_state.beliefs) - 1.0) < 1e-10
|
| 268 |
+
|
| 269 |
+
# Check sum after each update
|
| 270 |
+
for update in updates:
|
| 271 |
+
belief_state.update_beliefs(update)
|
| 272 |
+
assert abs(np.sum(belief_state.beliefs) - 1.0) < 1e-10
|
| 273 |
+
|
| 274 |
+
def test_impossible_evidence_handling(self):
|
| 275 |
+
"""Test handling of evidence combinations that create zero likelihoods."""
|
| 276 |
+
belief_state = BayesianBeliefState(dice_sides=6)
|
| 277 |
+
|
| 278 |
+
# Apply a few "higher" results to favor lower targets
|
| 279 |
+
for _ in range(3):
|
| 280 |
+
update1 = BeliefUpdate(comparison_result="higher")
|
| 281 |
+
belief_state.update_beliefs(update1)
|
| 282 |
+
|
| 283 |
+
# Target 1 should be favored, target 6 should have zero probability
|
| 284 |
+
prob_1 = belief_state.get_belief_for_target(1)
|
| 285 |
+
prob_6 = belief_state.get_belief_for_target(6)
|
| 286 |
+
|
| 287 |
+
assert prob_1 > 0 # Target 1 should have some probability
|
| 288 |
+
assert abs(prob_6 - 0.0) < 1e-10 # Target 6 should have zero probability
|
| 289 |
+
|
| 290 |
+
# Apply more evidence and verify probabilities still sum to 1
|
| 291 |
+
update2 = BeliefUpdate(comparison_result="lower")
|
| 292 |
+
belief_state.update_beliefs(update2)
|
| 293 |
+
|
| 294 |
+
total_prob = sum(belief_state.get_belief_for_target(i) for i in range(1, 7))
|
| 295 |
+
assert abs(total_prob - 1.0) < 1e-10 # Should still sum to 1
|
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import random
|
| 3 |
+
from domains.environment.environment_domain import Environment, EnvironmentEvidence
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TestEnvironmentEvidence:
|
| 7 |
+
"""Test the EnvironmentEvidence dataclass."""
|
| 8 |
+
|
| 9 |
+
def test_evidence_creation(self):
|
| 10 |
+
"""Test creating evidence with valid data."""
|
| 11 |
+
evidence = EnvironmentEvidence(dice_roll=3, comparison_result="higher")
|
| 12 |
+
assert evidence.dice_roll == 3
|
| 13 |
+
assert evidence.comparison_result == "higher"
|
| 14 |
+
|
| 15 |
+
def test_evidence_comparison_results(self):
|
| 16 |
+
"""Test all valid comparison results."""
|
| 17 |
+
valid_results = ["higher", "lower", "same"]
|
| 18 |
+
for result in valid_results:
|
| 19 |
+
evidence = EnvironmentEvidence(dice_roll=1, comparison_result=result)
|
| 20 |
+
assert evidence.comparison_result == result
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class TestEnvironment:
|
| 24 |
+
"""Test the Environment class."""
|
| 25 |
+
|
| 26 |
+
def test_environment_initialization(self):
|
| 27 |
+
"""Test environment initialization with default and custom parameters."""
|
| 28 |
+
# Default initialization
|
| 29 |
+
env = Environment()
|
| 30 |
+
assert env.dice_sides == 6
|
| 31 |
+
assert env._target_value is None
|
| 32 |
+
|
| 33 |
+
# Custom initialization
|
| 34 |
+
env = Environment(dice_sides=8, seed=42)
|
| 35 |
+
assert env.dice_sides == 8
|
| 36 |
+
assert env._target_value is None
|
| 37 |
+
|
| 38 |
+
def test_set_target_value_valid(self):
|
| 39 |
+
"""Test setting valid target values."""
|
| 40 |
+
env = Environment(dice_sides=6)
|
| 41 |
+
|
| 42 |
+
for target in range(1, 7):
|
| 43 |
+
env.set_target_value(target)
|
| 44 |
+
assert env.get_target_value() == target
|
| 45 |
+
|
| 46 |
+
def test_set_target_value_invalid(self):
|
| 47 |
+
"""Test setting invalid target values raises ValueError."""
|
| 48 |
+
env = Environment(dice_sides=6)
|
| 49 |
+
|
| 50 |
+
invalid_targets = [0, 7, -1, 10]
|
| 51 |
+
for target in invalid_targets:
|
| 52 |
+
with pytest.raises(ValueError, match="Target must be between 1 and 6"):
|
| 53 |
+
env.set_target_value(target)
|
| 54 |
+
|
| 55 |
+
def test_get_target_value_not_set(self):
|
| 56 |
+
"""Test getting target value when not set raises ValueError."""
|
| 57 |
+
env = Environment()
|
| 58 |
+
|
| 59 |
+
with pytest.raises(ValueError, match="Target value not set"):
|
| 60 |
+
env.get_target_value()
|
| 61 |
+
|
| 62 |
+
def test_generate_random_target(self):
|
| 63 |
+
"""Test random target generation."""
|
| 64 |
+
env = Environment(dice_sides=6, seed=42)
|
| 65 |
+
|
| 66 |
+
# Generate multiple targets to test randomness
|
| 67 |
+
targets = [env.generate_random_target() for _ in range(10)]
|
| 68 |
+
|
| 69 |
+
# All targets should be valid
|
| 70 |
+
for target in targets:
|
| 71 |
+
assert 1 <= target <= 6
|
| 72 |
+
|
| 73 |
+
# Should be able to get the target after generation
|
| 74 |
+
assert env.get_target_value() == targets[-1]
|
| 75 |
+
|
| 76 |
+
def test_generate_random_target_reproducible(self):
|
| 77 |
+
"""Test that random target generation is reproducible with seed."""
|
| 78 |
+
env1 = Environment(dice_sides=6, seed=42)
|
| 79 |
+
env2 = Environment(dice_sides=6, seed=42)
|
| 80 |
+
|
| 81 |
+
target1 = env1.generate_random_target()
|
| 82 |
+
target2 = env2.generate_random_target()
|
| 83 |
+
|
| 84 |
+
assert target1 == target2
|
| 85 |
+
|
| 86 |
+
def test_roll_dice_and_compare_target_not_set(self):
|
| 87 |
+
"""Test rolling dice without target set raises ValueError."""
|
| 88 |
+
env = Environment()
|
| 89 |
+
|
| 90 |
+
with pytest.raises(ValueError, match="Target value not set"):
|
| 91 |
+
env.roll_dice_and_compare()
|
| 92 |
+
|
| 93 |
+
def test_roll_dice_and_compare_higher(self):
|
| 94 |
+
"""Test dice roll comparison when result is higher."""
|
| 95 |
+
env = Environment(dice_sides=6, seed=42)
|
| 96 |
+
env.set_target_value(1) # Target = 1, any roll > 1 should be "higher"
|
| 97 |
+
|
| 98 |
+
# Run multiple times to test different rolls
|
| 99 |
+
results = []
|
| 100 |
+
for _ in range(20):
|
| 101 |
+
evidence = env.roll_dice_and_compare()
|
| 102 |
+
results.append(evidence)
|
| 103 |
+
|
| 104 |
+
assert 1 <= evidence.dice_roll <= 6
|
| 105 |
+
if evidence.dice_roll > 1:
|
| 106 |
+
assert evidence.comparison_result == "higher"
|
| 107 |
+
elif evidence.dice_roll < 1:
|
| 108 |
+
assert evidence.comparison_result == "lower"
|
| 109 |
+
else:
|
| 110 |
+
assert evidence.comparison_result == "same"
|
| 111 |
+
|
| 112 |
+
def test_roll_dice_and_compare_lower(self):
|
| 113 |
+
"""Test dice roll comparison when result is lower."""
|
| 114 |
+
env = Environment(dice_sides=6, seed=42)
|
| 115 |
+
env.set_target_value(6) # Target = 6, any roll < 6 should be "lower"
|
| 116 |
+
|
| 117 |
+
# Run multiple times to test different rolls
|
| 118 |
+
for _ in range(20):
|
| 119 |
+
evidence = env.roll_dice_and_compare()
|
| 120 |
+
|
| 121 |
+
assert 1 <= evidence.dice_roll <= 6
|
| 122 |
+
if evidence.dice_roll > 6:
|
| 123 |
+
assert evidence.comparison_result == "higher"
|
| 124 |
+
elif evidence.dice_roll < 6:
|
| 125 |
+
assert evidence.comparison_result == "lower"
|
| 126 |
+
else:
|
| 127 |
+
assert evidence.comparison_result == "same"
|
| 128 |
+
|
| 129 |
+
def test_roll_dice_and_compare_same(self):
|
| 130 |
+
"""Test dice roll comparison when result is same."""
|
| 131 |
+
env = Environment(dice_sides=6, seed=42)
|
| 132 |
+
|
| 133 |
+
# Test each possible target value
|
| 134 |
+
for target in range(1, 7):
|
| 135 |
+
env.set_target_value(target)
|
| 136 |
+
|
| 137 |
+
# Roll until we get a match (may take several tries)
|
| 138 |
+
found_same = False
|
| 139 |
+
for _ in range(100): # Avoid infinite loop
|
| 140 |
+
evidence = env.roll_dice_and_compare()
|
| 141 |
+
|
| 142 |
+
if evidence.dice_roll == target:
|
| 143 |
+
assert evidence.comparison_result == "same"
|
| 144 |
+
found_same = True
|
| 145 |
+
break
|
| 146 |
+
elif evidence.dice_roll > target:
|
| 147 |
+
assert evidence.comparison_result == "higher"
|
| 148 |
+
else:
|
| 149 |
+
assert evidence.comparison_result == "lower"
|
| 150 |
+
|
| 151 |
+
# With 100 attempts, we should find at least one match for 6-sided die
|
| 152 |
+
assert found_same, f"Failed to roll target value {target} in 100 attempts"
|
| 153 |
+
|
| 154 |
+
def test_roll_dice_and_compare_all_outcomes(self):
|
| 155 |
+
"""Test that all comparison outcomes can occur."""
|
| 156 |
+
env = Environment(dice_sides=6, seed=42)
|
| 157 |
+
env.set_target_value(3) # Middle value to allow all outcomes
|
| 158 |
+
|
| 159 |
+
outcomes_seen = set()
|
| 160 |
+
|
| 161 |
+
# Roll many times to see all outcomes
|
| 162 |
+
for _ in range(100):
|
| 163 |
+
evidence = env.roll_dice_and_compare()
|
| 164 |
+
outcomes_seen.add(evidence.comparison_result)
|
| 165 |
+
|
| 166 |
+
# Verify consistency
|
| 167 |
+
if evidence.dice_roll > 3:
|
| 168 |
+
assert evidence.comparison_result == "higher"
|
| 169 |
+
elif evidence.dice_roll < 3:
|
| 170 |
+
assert evidence.comparison_result == "lower"
|
| 171 |
+
else:
|
| 172 |
+
assert evidence.comparison_result == "same"
|
| 173 |
+
|
| 174 |
+
# Should see all three outcomes with enough rolls
|
| 175 |
+
assert "higher" in outcomes_seen
|
| 176 |
+
assert "lower" in outcomes_seen
|
| 177 |
+
assert "same" in outcomes_seen
|
| 178 |
+
|
| 179 |
+
def test_dice_sides_parameter(self):
|
| 180 |
+
"""Test environment with different dice sides."""
|
| 181 |
+
for sides in [4, 8, 10, 20]:
|
| 182 |
+
env = Environment(dice_sides=sides, seed=42)
|
| 183 |
+
env.set_target_value(sides // 2) # Middle value
|
| 184 |
+
|
| 185 |
+
evidence = env.roll_dice_and_compare()
|
| 186 |
+
assert 1 <= evidence.dice_roll <= sides
|
| 187 |
+
assert evidence.comparison_result in ["higher", "lower", "same"]
|
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from domains.coordination.game_coordination import BayesianGame, GameState, GamePhase
|
| 3 |
+
from domains.environment.environment_domain import EnvironmentEvidence
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TestGameState:
|
| 7 |
+
"""Test the GameState dataclass."""
|
| 8 |
+
|
| 9 |
+
def test_game_state_creation(self):
|
| 10 |
+
"""Test creating game state with required parameters."""
|
| 11 |
+
state = GameState(
|
| 12 |
+
round_number=5,
|
| 13 |
+
max_rounds=10,
|
| 14 |
+
phase=GamePhase.PLAYING
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
assert state.round_number == 5
|
| 18 |
+
assert state.max_rounds == 10
|
| 19 |
+
assert state.phase == GamePhase.PLAYING
|
| 20 |
+
assert state.target_value is None
|
| 21 |
+
assert state.evidence_history == []
|
| 22 |
+
assert state.current_beliefs == []
|
| 23 |
+
|
| 24 |
+
def test_game_state_with_optional_params(self):
|
| 25 |
+
"""Test creating game state with optional parameters."""
|
| 26 |
+
evidence = [EnvironmentEvidence(dice_roll=3, comparison_result="higher")]
|
| 27 |
+
beliefs = [0.2, 0.3, 0.5]
|
| 28 |
+
|
| 29 |
+
state = GameState(
|
| 30 |
+
round_number=2,
|
| 31 |
+
max_rounds=5,
|
| 32 |
+
phase=GamePhase.PLAYING,
|
| 33 |
+
target_value=4,
|
| 34 |
+
evidence_history=evidence,
|
| 35 |
+
current_beliefs=beliefs,
|
| 36 |
+
most_likely_target=3,
|
| 37 |
+
belief_entropy=1.5
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
assert state.target_value == 4
|
| 41 |
+
assert state.evidence_history == evidence
|
| 42 |
+
assert state.current_beliefs == beliefs
|
| 43 |
+
assert state.most_likely_target == 3
|
| 44 |
+
assert state.belief_entropy == 1.5
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class TestBayesianGame:
|
| 48 |
+
"""Test the BayesianGame class."""
|
| 49 |
+
|
| 50 |
+
def test_initialization_default(self):
|
| 51 |
+
"""Test game initialization with default parameters."""
|
| 52 |
+
game = BayesianGame()
|
| 53 |
+
|
| 54 |
+
assert game.dice_sides == 6
|
| 55 |
+
assert game.max_rounds == 10
|
| 56 |
+
assert game.environment.dice_sides == 6
|
| 57 |
+
assert game.belief_state.dice_sides == 6
|
| 58 |
+
assert game.game_state.phase == GamePhase.SETUP
|
| 59 |
+
assert game.game_state.round_number == 0
|
| 60 |
+
assert game.game_state.max_rounds == 10
|
| 61 |
+
|
| 62 |
+
def test_initialization_custom(self):
|
| 63 |
+
"""Test game initialization with custom parameters."""
|
| 64 |
+
game = BayesianGame(dice_sides=8, max_rounds=15, seed=42)
|
| 65 |
+
|
| 66 |
+
assert game.dice_sides == 8
|
| 67 |
+
assert game.max_rounds == 15
|
| 68 |
+
assert game.environment.dice_sides == 8
|
| 69 |
+
assert game.belief_state.dice_sides == 8
|
| 70 |
+
assert game.game_state.max_rounds == 15
|
| 71 |
+
|
| 72 |
+
def test_start_new_game_random_target(self):
|
| 73 |
+
"""Test starting new game with random target."""
|
| 74 |
+
game = BayesianGame(seed=42)
|
| 75 |
+
|
| 76 |
+
state = game.start_new_game()
|
| 77 |
+
|
| 78 |
+
assert state.phase == GamePhase.PLAYING
|
| 79 |
+
assert state.round_number == 0
|
| 80 |
+
assert 1 <= state.target_value <= 6
|
| 81 |
+
assert len(state.evidence_history) == 0
|
| 82 |
+
assert len(state.current_beliefs) == 6
|
| 83 |
+
assert state.most_likely_target in range(1, 7)
|
| 84 |
+
assert state.belief_entropy > 0
|
| 85 |
+
|
| 86 |
+
def test_start_new_game_specific_target(self):
|
| 87 |
+
"""Test starting new game with specific target."""
|
| 88 |
+
game = BayesianGame()
|
| 89 |
+
|
| 90 |
+
state = game.start_new_game(target_value=4)
|
| 91 |
+
|
| 92 |
+
assert state.phase == GamePhase.PLAYING
|
| 93 |
+
assert state.target_value == 4
|
| 94 |
+
assert game.environment.get_target_value() == 4
|
| 95 |
+
|
| 96 |
+
def test_start_new_game_resets_state(self):
|
| 97 |
+
"""Test that starting new game resets previous state."""
|
| 98 |
+
game = BayesianGame(seed=42)
|
| 99 |
+
|
| 100 |
+
# Start first game and play some rounds
|
| 101 |
+
game.start_new_game(target_value=3)
|
| 102 |
+
game.play_round()
|
| 103 |
+
game.play_round()
|
| 104 |
+
|
| 105 |
+
# Start new game
|
| 106 |
+
state = game.start_new_game(target_value=5)
|
| 107 |
+
|
| 108 |
+
assert state.target_value == 5
|
| 109 |
+
assert state.round_number == 0
|
| 110 |
+
assert len(state.evidence_history) == 0
|
| 111 |
+
assert len(game.belief_state.evidence_history) == 0
|
| 112 |
+
|
| 113 |
+
def test_play_round_not_playing(self):
|
| 114 |
+
"""Test playing round when not in playing phase."""
|
| 115 |
+
game = BayesianGame()
|
| 116 |
+
|
| 117 |
+
# Game starts in setup phase
|
| 118 |
+
with pytest.raises(ValueError, match="Game is not in playing phase"):
|
| 119 |
+
game.play_round()
|
| 120 |
+
|
| 121 |
+
def test_play_round_game_finished(self):
|
| 122 |
+
"""Test playing round when game is already finished."""
|
| 123 |
+
game = BayesianGame(max_rounds=1, seed=42)
|
| 124 |
+
|
| 125 |
+
# Start game and play one round (should finish)
|
| 126 |
+
game.start_new_game(target_value=3)
|
| 127 |
+
game.play_round()
|
| 128 |
+
|
| 129 |
+
# Try to play another round
|
| 130 |
+
with pytest.raises(ValueError, match="Game is not in playing phase"):
|
| 131 |
+
game.play_round()
|
| 132 |
+
|
| 133 |
+
def test_play_round_updates_state(self):
|
| 134 |
+
"""Test that playing round updates game state correctly."""
|
| 135 |
+
game = BayesianGame(seed=42)
|
| 136 |
+
game.start_new_game(target_value=3)
|
| 137 |
+
|
| 138 |
+
initial_round_number = game.get_current_state().round_number
|
| 139 |
+
|
| 140 |
+
# Play one round
|
| 141 |
+
updated_state = game.play_round()
|
| 142 |
+
|
| 143 |
+
assert updated_state.round_number == initial_round_number + 1
|
| 144 |
+
assert len(updated_state.evidence_history) == 1
|
| 145 |
+
assert len(updated_state.current_beliefs) == 6
|
| 146 |
+
assert updated_state.most_likely_target in range(1, 7)
|
| 147 |
+
assert updated_state.belief_entropy >= 0
|
| 148 |
+
|
| 149 |
+
# Evidence should be valid
|
| 150 |
+
evidence = updated_state.evidence_history[0]
|
| 151 |
+
assert 1 <= evidence.dice_roll <= 6
|
| 152 |
+
assert evidence.comparison_result in ["higher", "lower", "same"]
|
| 153 |
+
|
| 154 |
+
def test_play_multiple_rounds(self):
|
| 155 |
+
"""Test playing multiple rounds."""
|
| 156 |
+
game = BayesianGame(max_rounds=5, seed=42)
|
| 157 |
+
game.start_new_game(target_value=4)
|
| 158 |
+
|
| 159 |
+
for expected_round in range(1, 6):
|
| 160 |
+
state = game.play_round()
|
| 161 |
+
|
| 162 |
+
assert state.round_number == expected_round
|
| 163 |
+
assert len(state.evidence_history) == expected_round
|
| 164 |
+
|
| 165 |
+
if expected_round < 5:
|
| 166 |
+
assert state.phase == GamePhase.PLAYING
|
| 167 |
+
else:
|
| 168 |
+
assert state.phase == GamePhase.FINISHED
|
| 169 |
+
|
| 170 |
+
def test_get_current_state(self):
|
| 171 |
+
"""Test getting current game state."""
|
| 172 |
+
game = BayesianGame()
|
| 173 |
+
|
| 174 |
+
# Initial state
|
| 175 |
+
state = game.get_current_state()
|
| 176 |
+
assert state.phase == GamePhase.SETUP
|
| 177 |
+
|
| 178 |
+
# After starting game
|
| 179 |
+
game.start_new_game(target_value=2)
|
| 180 |
+
state = game.get_current_state()
|
| 181 |
+
assert state.phase == GamePhase.PLAYING
|
| 182 |
+
assert state.target_value == 2
|
| 183 |
+
|
| 184 |
+
def test_is_game_finished(self):
|
| 185 |
+
"""Test checking if game is finished."""
|
| 186 |
+
game = BayesianGame(max_rounds=2, seed=42)
|
| 187 |
+
|
| 188 |
+
# Initially not finished
|
| 189 |
+
assert not game.is_game_finished()
|
| 190 |
+
|
| 191 |
+
# Start game - still not finished
|
| 192 |
+
game.start_new_game(target_value=3)
|
| 193 |
+
assert not game.is_game_finished()
|
| 194 |
+
|
| 195 |
+
# Play one round - still not finished
|
| 196 |
+
game.play_round()
|
| 197 |
+
assert not game.is_game_finished()
|
| 198 |
+
|
| 199 |
+
# Play final round - now finished
|
| 200 |
+
game.play_round()
|
| 201 |
+
assert game.is_game_finished()
|
| 202 |
+
|
| 203 |
+
def test_get_final_guess_accuracy_no_target(self):
|
| 204 |
+
"""Test getting final guess accuracy without target set."""
|
| 205 |
+
game = BayesianGame()
|
| 206 |
+
|
| 207 |
+
with pytest.raises(ValueError, match="Target value not set"):
|
| 208 |
+
game.get_final_guess_accuracy()
|
| 209 |
+
|
| 210 |
+
def test_get_final_guess_accuracy(self):
|
| 211 |
+
"""Test getting final guess accuracy."""
|
| 212 |
+
game = BayesianGame(seed=42)
|
| 213 |
+
game.start_new_game(target_value=3)
|
| 214 |
+
|
| 215 |
+
# Play some rounds
|
| 216 |
+
game.play_round()
|
| 217 |
+
game.play_round()
|
| 218 |
+
|
| 219 |
+
accuracy = game.get_final_guess_accuracy()
|
| 220 |
+
|
| 221 |
+
# Should be probability assigned to target value 3
|
| 222 |
+
assert 0 <= accuracy <= 1
|
| 223 |
+
expected_accuracy = game.belief_state.get_belief_for_target(3)
|
| 224 |
+
assert accuracy == expected_accuracy
|
| 225 |
+
|
| 226 |
+
def test_was_final_guess_correct_no_target(self):
|
| 227 |
+
"""Test checking final guess correctness without target set."""
|
| 228 |
+
game = BayesianGame()
|
| 229 |
+
|
| 230 |
+
with pytest.raises(ValueError, match="Target value not set"):
|
| 231 |
+
game.was_final_guess_correct()
|
| 232 |
+
|
| 233 |
+
def test_was_final_guess_correct(self):
|
| 234 |
+
"""Test checking if final guess was correct."""
|
| 235 |
+
game = BayesianGame(seed=42)
|
| 236 |
+
game.start_new_game(target_value=3)
|
| 237 |
+
|
| 238 |
+
# Play rounds until we get definitive evidence
|
| 239 |
+
for _ in range(10): # Play enough rounds to get clear evidence
|
| 240 |
+
if game.is_game_finished():
|
| 241 |
+
break
|
| 242 |
+
game.play_round()
|
| 243 |
+
|
| 244 |
+
is_correct = game.was_final_guess_correct()
|
| 245 |
+
most_likely = game.game_state.most_likely_target
|
| 246 |
+
|
| 247 |
+
assert isinstance(is_correct, bool)
|
| 248 |
+
assert is_correct == (most_likely == 3)
|
| 249 |
+
|
| 250 |
+
def test_get_game_summary(self):
|
| 251 |
+
"""Test getting game summary."""
|
| 252 |
+
game = BayesianGame(max_rounds=3, seed=42)
|
| 253 |
+
game.start_new_game(target_value=4)
|
| 254 |
+
|
| 255 |
+
# Play all rounds
|
| 256 |
+
while not game.is_game_finished():
|
| 257 |
+
game.play_round()
|
| 258 |
+
|
| 259 |
+
summary = game.get_game_summary()
|
| 260 |
+
|
| 261 |
+
# Check all required fields
|
| 262 |
+
assert summary["rounds_played"] == 3
|
| 263 |
+
assert summary["max_rounds"] == 3
|
| 264 |
+
assert summary["true_target"] == 4
|
| 265 |
+
assert summary["final_guess"] in range(1, 7)
|
| 266 |
+
assert isinstance(summary["guess_correct"], bool)
|
| 267 |
+
assert 0 <= summary["final_accuracy"] <= 1
|
| 268 |
+
assert summary["final_entropy"] >= 0
|
| 269 |
+
assert summary["evidence_count"] == 3
|
| 270 |
+
assert len(summary["final_beliefs"]) == 6
|
| 271 |
+
|
| 272 |
+
# Check that final beliefs are properly indexed (1-6)
|
| 273 |
+
for i in range(1, 7):
|
| 274 |
+
assert i in summary["final_beliefs"]
|
| 275 |
+
|
| 276 |
+
def test_belief_updates_with_evidence(self):
|
| 277 |
+
"""Test that belief updates properly reflect evidence."""
|
| 278 |
+
game = BayesianGame(seed=42)
|
| 279 |
+
game.start_new_game(target_value=1) # Low target for predictable evidence
|
| 280 |
+
|
| 281 |
+
initial_beliefs = game.belief_state.get_current_beliefs()
|
| 282 |
+
|
| 283 |
+
# Play several rounds
|
| 284 |
+
states = []
|
| 285 |
+
for _ in range(5):
|
| 286 |
+
if game.is_game_finished():
|
| 287 |
+
break
|
| 288 |
+
state = game.play_round()
|
| 289 |
+
states.append(state)
|
| 290 |
+
|
| 291 |
+
# Beliefs should change as evidence accumulates
|
| 292 |
+
final_beliefs = game.belief_state.get_current_beliefs()
|
| 293 |
+
|
| 294 |
+
# Should not be uniform anymore (unless very unlikely)
|
| 295 |
+
assert not all(abs(b - 1/6) < 1e-10 for b in final_beliefs)
|
| 296 |
+
|
| 297 |
+
# Evidence should influence beliefs correctly
|
| 298 |
+
for state in states:
|
| 299 |
+
for evidence in state.evidence_history:
|
| 300 |
+
if evidence.comparison_result == "higher":
|
| 301 |
+
# Target must be less than dice roll
|
| 302 |
+
for target in range(evidence.dice_roll, 7):
|
| 303 |
+
# These targets should have reduced probability
|
| 304 |
+
pass # Detailed verification would require complex logic
|
| 305 |
+
|
| 306 |
+
def test_game_with_evidence_updates(self):
|
| 307 |
+
"""Test game behavior with evidence updates."""
|
| 308 |
+
game = BayesianGame(seed=42)
|
| 309 |
+
game.start_new_game(target_value=3)
|
| 310 |
+
|
| 311 |
+
# Apply evidence that changes beliefs
|
| 312 |
+
from domains.belief.belief_domain import BeliefUpdate
|
| 313 |
+
update = BeliefUpdate(comparison_result="higher")
|
| 314 |
+
game.belief_state.update_beliefs(update)
|
| 315 |
+
|
| 316 |
+
# Update game state to reflect the belief change
|
| 317 |
+
game.game_state.most_likely_target = game.belief_state.get_most_likely_target()
|
| 318 |
+
|
| 319 |
+
# Beliefs should have changed from uniform
|
| 320 |
+
prob_1 = game.belief_state.get_belief_for_target(1)
|
| 321 |
+
prob_6 = game.belief_state.get_belief_for_target(6)
|
| 322 |
+
|
| 323 |
+
assert prob_1 > prob_6 # Lower targets should be more likely after "higher"
|
| 324 |
+
assert game.belief_state.get_most_likely_target() in range(1, 7)
|
| 325 |
+
assert 0 <= game.get_final_guess_accuracy() <= 1
|
| 326 |
+
|
| 327 |
+
def test_reproducibility_with_seed(self):
|
| 328 |
+
"""Test that games are reproducible with same seed."""
|
| 329 |
+
# Run two games with same seed
|
| 330 |
+
game1 = BayesianGame(seed=42)
|
| 331 |
+
game1.start_new_game(target_value=3)
|
| 332 |
+
|
| 333 |
+
game2 = BayesianGame(seed=42)
|
| 334 |
+
game2.start_new_game(target_value=3)
|
| 335 |
+
|
| 336 |
+
# Play same number of rounds
|
| 337 |
+
for _ in range(5):
|
| 338 |
+
if game1.is_game_finished() or game2.is_game_finished():
|
| 339 |
+
break
|
| 340 |
+
|
| 341 |
+
state1 = game1.play_round()
|
| 342 |
+
state2 = game2.play_round()
|
| 343 |
+
|
| 344 |
+
# Evidence should be identical
|
| 345 |
+
assert len(state1.evidence_history) == len(state2.evidence_history)
|
| 346 |
+
for ev1, ev2 in zip(state1.evidence_history, state2.evidence_history):
|
| 347 |
+
assert ev1.dice_roll == ev2.dice_roll
|
| 348 |
+
assert ev1.comparison_result == ev2.comparison_result
|
| 349 |
+
|
| 350 |
+
# Beliefs should be identical
|
| 351 |
+
assert state1.current_beliefs == state2.current_beliefs
|
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for the Gradio UI interface to ensure proper error handling and memory management.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
from ui.gradio_interface import GradioInterface
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestGradioInterface:
|
| 11 |
+
"""Test the Gradio interface functionality."""
|
| 12 |
+
|
| 13 |
+
def test_interface_initialization(self):
|
| 14 |
+
"""Test that interface initializes correctly."""
|
| 15 |
+
interface = GradioInterface()
|
| 16 |
+
assert interface.game is not None
|
| 17 |
+
assert interface.game.dice_sides == 6
|
| 18 |
+
assert interface.game.max_rounds == 10
|
| 19 |
+
|
| 20 |
+
def test_reset_game_returns_proper_types(self):
|
| 21 |
+
"""Test that reset_game returns proper types."""
|
| 22 |
+
interface = GradioInterface()
|
| 23 |
+
result = interface.reset_game(dice_sides=8, max_rounds=15)
|
| 24 |
+
|
| 25 |
+
assert len(result) == 4
|
| 26 |
+
status, round_info, belief_chart, game_log = result
|
| 27 |
+
|
| 28 |
+
assert isinstance(status, str)
|
| 29 |
+
assert isinstance(round_info, str)
|
| 30 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 31 |
+
assert isinstance(game_log, str)
|
| 32 |
+
|
| 33 |
+
def test_start_new_game_valid_target(self):
|
| 34 |
+
"""Test starting a new game with valid target."""
|
| 35 |
+
interface = GradioInterface()
|
| 36 |
+
result = interface.start_new_game("3")
|
| 37 |
+
|
| 38 |
+
assert len(result) == 4
|
| 39 |
+
status, round_info, belief_chart, game_log = result
|
| 40 |
+
|
| 41 |
+
assert isinstance(status, str)
|
| 42 |
+
assert isinstance(round_info, str)
|
| 43 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 44 |
+
assert isinstance(game_log, str)
|
| 45 |
+
assert "Playing" in status
|
| 46 |
+
|
| 47 |
+
def test_start_new_game_invalid_target(self):
|
| 48 |
+
"""Test starting a new game with invalid target returns proper types."""
|
| 49 |
+
interface = GradioInterface()
|
| 50 |
+
result = interface.start_new_game("10") # Invalid for 6-sided die
|
| 51 |
+
|
| 52 |
+
assert len(result) == 4
|
| 53 |
+
status, round_info, belief_chart, game_log = result
|
| 54 |
+
|
| 55 |
+
assert isinstance(status, str)
|
| 56 |
+
assert isinstance(round_info, str)
|
| 57 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 58 |
+
assert isinstance(game_log, str)
|
| 59 |
+
assert "❌" in status
|
| 60 |
+
assert "between 1 and 6" in status
|
| 61 |
+
|
| 62 |
+
def test_play_round_without_game_started(self):
|
| 63 |
+
"""Test playing round without starting game returns proper types."""
|
| 64 |
+
interface = GradioInterface()
|
| 65 |
+
result = interface.play_round()
|
| 66 |
+
|
| 67 |
+
assert len(result) == 4
|
| 68 |
+
status, round_info, belief_chart, game_log = result
|
| 69 |
+
|
| 70 |
+
assert isinstance(status, str)
|
| 71 |
+
assert isinstance(round_info, str)
|
| 72 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 73 |
+
assert isinstance(game_log, str)
|
| 74 |
+
assert "❌" in status
|
| 75 |
+
assert "not in playing phase" in status
|
| 76 |
+
|
| 77 |
+
def test_play_round_normal_flow(self):
|
| 78 |
+
"""Test normal round playing flow."""
|
| 79 |
+
interface = GradioInterface()
|
| 80 |
+
|
| 81 |
+
# Start a game first
|
| 82 |
+
interface.start_new_game("3")
|
| 83 |
+
|
| 84 |
+
# Play a round
|
| 85 |
+
result = interface.play_round()
|
| 86 |
+
|
| 87 |
+
assert len(result) == 4
|
| 88 |
+
status, round_info, belief_chart, game_log = result
|
| 89 |
+
|
| 90 |
+
assert isinstance(status, str)
|
| 91 |
+
assert isinstance(round_info, str)
|
| 92 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 93 |
+
assert isinstance(game_log, str)
|
| 94 |
+
assert "Playing" in status
|
| 95 |
+
|
| 96 |
+
def test_exceeding_max_rounds(self):
|
| 97 |
+
"""Test that exceeding max rounds shows graceful completion."""
|
| 98 |
+
interface = GradioInterface()
|
| 99 |
+
|
| 100 |
+
# Start a game with 2 rounds
|
| 101 |
+
interface.reset_game(dice_sides=6, max_rounds=2)
|
| 102 |
+
interface.start_new_game("3")
|
| 103 |
+
|
| 104 |
+
# Play 2 rounds (should finish the game)
|
| 105 |
+
interface.play_round()
|
| 106 |
+
interface.play_round()
|
| 107 |
+
|
| 108 |
+
# Try to play another round (should be prevented)
|
| 109 |
+
result = interface.play_round()
|
| 110 |
+
|
| 111 |
+
assert len(result) == 4
|
| 112 |
+
status, round_info, belief_chart, game_log = result
|
| 113 |
+
|
| 114 |
+
assert isinstance(status, str)
|
| 115 |
+
assert isinstance(round_info, str)
|
| 116 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 117 |
+
assert isinstance(game_log, str)
|
| 118 |
+
# When game is finished, we should get a graceful completion message
|
| 119 |
+
assert ("🏁" in status and "completed" in status)
|
| 120 |
+
|
| 121 |
+
def test_create_empty_chart(self):
|
| 122 |
+
"""Test that empty chart creation works properly."""
|
| 123 |
+
interface = GradioInterface()
|
| 124 |
+
chart = interface._create_empty_chart()
|
| 125 |
+
|
| 126 |
+
assert isinstance(chart, plt.Figure)
|
| 127 |
+
# Clean up
|
| 128 |
+
plt.close(chart)
|
| 129 |
+
|
| 130 |
+
def test_matplotlib_memory_management(self):
|
| 131 |
+
"""Test that matplotlib figures are properly managed."""
|
| 132 |
+
interface = GradioInterface()
|
| 133 |
+
|
| 134 |
+
# Get initial figure count
|
| 135 |
+
initial_figures = len(plt.get_fignums())
|
| 136 |
+
|
| 137 |
+
# Create multiple charts
|
| 138 |
+
for _ in range(5):
|
| 139 |
+
interface._create_belief_chart()
|
| 140 |
+
|
| 141 |
+
# Should not accumulate figures due to plt.close('all')
|
| 142 |
+
final_figures = len(plt.get_fignums())
|
| 143 |
+
|
| 144 |
+
# Should have at most 1 figure open (the most recent one)
|
| 145 |
+
assert final_figures <= initial_figures + 1
|
| 146 |
+
|
| 147 |
+
def test_error_handling_preserves_types(self):
|
| 148 |
+
"""Test that error handling always returns consistent types."""
|
| 149 |
+
interface = GradioInterface()
|
| 150 |
+
|
| 151 |
+
# Test various error conditions
|
| 152 |
+
error_results = [
|
| 153 |
+
interface.start_new_game("invalid_number"),
|
| 154 |
+
interface.start_new_game("0"),
|
| 155 |
+
interface.start_new_game("100"),
|
| 156 |
+
interface.play_round(), # No game started
|
| 157 |
+
]
|
| 158 |
+
|
| 159 |
+
for result in error_results:
|
| 160 |
+
assert len(result) == 4
|
| 161 |
+
status, round_info, belief_chart, game_log = result
|
| 162 |
+
|
| 163 |
+
assert isinstance(status, str)
|
| 164 |
+
assert isinstance(round_info, str)
|
| 165 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 166 |
+
assert isinstance(game_log, str)
|
| 167 |
+
assert "❌" in status
|
| 168 |
+
|
| 169 |
+
# Clean up the figure
|
| 170 |
+
plt.close(belief_chart)
|
| 171 |
+
|
| 172 |
+
def test_game_log_creation(self):
|
| 173 |
+
"""Test that game log is created properly."""
|
| 174 |
+
interface = GradioInterface()
|
| 175 |
+
interface.start_new_game("3")
|
| 176 |
+
|
| 177 |
+
# Play a few rounds
|
| 178 |
+
for _ in range(3):
|
| 179 |
+
interface.play_round()
|
| 180 |
+
|
| 181 |
+
result = interface._get_interface_state()
|
| 182 |
+
status, round_info, belief_chart, game_log = result
|
| 183 |
+
|
| 184 |
+
assert isinstance(game_log, str)
|
| 185 |
+
assert "Evidence History" in game_log
|
| 186 |
+
assert "Round" in game_log
|
| 187 |
+
|
| 188 |
+
# Clean up
|
| 189 |
+
plt.close(belief_chart)
|
| 190 |
+
|
| 191 |
+
def test_graceful_game_completion(self):
|
| 192 |
+
"""Test that game completion shows comprehensive final results."""
|
| 193 |
+
interface = GradioInterface()
|
| 194 |
+
|
| 195 |
+
# Start and complete a game
|
| 196 |
+
interface.reset_game(dice_sides=6, max_rounds=3)
|
| 197 |
+
interface.start_new_game("4")
|
| 198 |
+
|
| 199 |
+
# Play all rounds
|
| 200 |
+
for _ in range(3):
|
| 201 |
+
interface.play_round()
|
| 202 |
+
|
| 203 |
+
# Get final state
|
| 204 |
+
result = interface._get_interface_state()
|
| 205 |
+
status, round_info, belief_chart, game_log = result
|
| 206 |
+
|
| 207 |
+
# Should show comprehensive final results
|
| 208 |
+
assert "Final Game Results" in round_info
|
| 209 |
+
assert "Learning Performance" in round_info
|
| 210 |
+
assert "Information gained" in round_info
|
| 211 |
+
assert "Game Completed" in game_log
|
| 212 |
+
assert ("Congratulations" in game_log or "Learning opportunity" in game_log)
|
| 213 |
+
assert "confidence in true target" in game_log
|
| 214 |
+
|
| 215 |
+
# Chart should have final state title
|
| 216 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 217 |
+
|
| 218 |
+
# Clean up
|
| 219 |
+
plt.close(belief_chart)
|
| 220 |
+
|
| 221 |
+
def test_completion_state_preservation(self):
|
| 222 |
+
"""Test that completion state preserves all information."""
|
| 223 |
+
interface = GradioInterface()
|
| 224 |
+
|
| 225 |
+
# Complete a game
|
| 226 |
+
interface.reset_game(dice_sides=6, max_rounds=2)
|
| 227 |
+
interface.start_new_game("3")
|
| 228 |
+
interface.play_round()
|
| 229 |
+
interface.play_round()
|
| 230 |
+
|
| 231 |
+
# Try to play after completion - should preserve final state
|
| 232 |
+
result = interface.play_round()
|
| 233 |
+
status, round_info, belief_chart, game_log = result
|
| 234 |
+
|
| 235 |
+
# Should still have all the final game information
|
| 236 |
+
assert "🏁" in status
|
| 237 |
+
assert "completed" in status
|
| 238 |
+
assert len(round_info) > 100 # Should have detailed final results
|
| 239 |
+
assert len(game_log) > 50 # Should have complete evidence history
|
| 240 |
+
assert isinstance(belief_chart, plt.Figure)
|
| 241 |
+
|
| 242 |
+
# Clean up
|
| 243 |
+
plt.close(belief_chart)
|
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# UI package initialization
|
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from typing import Tuple, Dict, Any, Union
|
| 5 |
+
|
| 6 |
+
from domains.coordination.game_coordination import BayesianGame, GamePhase
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class GradioInterface:
|
| 10 |
+
"""Gradio interface for the Bayesian Game."""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
"""Initialize the Gradio interface."""
|
| 14 |
+
self.game = None
|
| 15 |
+
self.reset_game()
|
| 16 |
+
|
| 17 |
+
def reset_game(
|
| 18 |
+
self, dice_sides: int = 6, max_rounds: int = 10
|
| 19 |
+
) -> Tuple[str, str, plt.Figure, str]:
|
| 20 |
+
"""Reset the game with new parameters.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
dice_sides: Number of sides on the dice
|
| 24 |
+
max_rounds: Maximum number of rounds
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
Tuple of (status, round_info, belief_chart, game_log)
|
| 28 |
+
"""
|
| 29 |
+
self.game = BayesianGame(dice_sides=dice_sides, max_rounds=max_rounds)
|
| 30 |
+
return self._get_interface_state()
|
| 31 |
+
|
| 32 |
+
def start_new_game(self, target_value: str = "") -> Tuple[str, str, plt.Figure, str]:
|
| 33 |
+
"""Start a new game.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
target_value: Optional specific target value
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
Tuple of (status, round_info, belief_chart, game_log)
|
| 40 |
+
"""
|
| 41 |
+
try:
|
| 42 |
+
target = int(target_value) if target_value.strip() else None
|
| 43 |
+
if target is not None and not (1 <= target <= self.game.dice_sides):
|
| 44 |
+
return (
|
| 45 |
+
f"❌ Target value must be between 1 and {self.game.dice_sides}",
|
| 46 |
+
"",
|
| 47 |
+
self._create_empty_chart(),
|
| 48 |
+
"",
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
self.game.start_new_game(target_value=target)
|
| 52 |
+
return self._get_interface_state()
|
| 53 |
+
except ValueError as e:
|
| 54 |
+
return f"❌ Error: {str(e)}", "", self._create_empty_chart(), ""
|
| 55 |
+
|
| 56 |
+
def play_round(self) -> Tuple[str, str, plt.Figure, str]:
|
| 57 |
+
"""Play one round of the game.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
Tuple of (status, round_info, belief_chart, game_log)
|
| 61 |
+
"""
|
| 62 |
+
try:
|
| 63 |
+
# Check if game is already finished - but still show the final state
|
| 64 |
+
if self.game.is_game_finished():
|
| 65 |
+
# Get the current final state but with a message about being finished
|
| 66 |
+
status, round_info, belief_chart, game_log = self._get_interface_state()
|
| 67 |
+
return (
|
| 68 |
+
"🏁 Game completed! All rounds finished. Start a new game to play again.",
|
| 69 |
+
round_info,
|
| 70 |
+
belief_chart,
|
| 71 |
+
game_log,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
if self.game.game_state.phase != GamePhase.PLAYING:
|
| 75 |
+
return (
|
| 76 |
+
"❌ Game not in playing phase. Start a new game first.",
|
| 77 |
+
"",
|
| 78 |
+
self._create_empty_chart(),
|
| 79 |
+
"",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
self.game.play_round()
|
| 83 |
+
return self._get_interface_state()
|
| 84 |
+
except ValueError as e:
|
| 85 |
+
return f"❌ Error: {str(e)}", "", self._create_empty_chart(), ""
|
| 86 |
+
|
| 87 |
+
def _get_interface_state(self) -> Tuple[str, str, plt.Figure, str]:
|
| 88 |
+
"""Get current interface state.
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Tuple of (status, round_info, belief_chart, game_log)
|
| 92 |
+
"""
|
| 93 |
+
state = self.game.get_current_state()
|
| 94 |
+
|
| 95 |
+
# Status message
|
| 96 |
+
if state.phase == GamePhase.SETUP:
|
| 97 |
+
status = "🎯 Ready to start new game"
|
| 98 |
+
elif state.phase == GamePhase.PLAYING:
|
| 99 |
+
status = f"🎲 Playing - Round {state.round_number}/{state.max_rounds}"
|
| 100 |
+
else: # FINISHED
|
| 101 |
+
correct = "✅" if self.game.was_final_guess_correct() else "❌"
|
| 102 |
+
accuracy = self.game.get_final_guess_accuracy()
|
| 103 |
+
status = f"{correct} Game finished! Final guess: {state.most_likely_target} (True: {state.target_value}) - Accuracy: {accuracy:.2f}"
|
| 104 |
+
|
| 105 |
+
# Round information
|
| 106 |
+
if state.target_value is not None:
|
| 107 |
+
if state.phase == GamePhase.FINISHED:
|
| 108 |
+
# Show comprehensive final results
|
| 109 |
+
summary = self.game.get_game_summary()
|
| 110 |
+
final_correct = "✅ Correct!" if summary["guess_correct"] else "❌ Incorrect"
|
| 111 |
+
round_info = f"""
|
| 112 |
+
**🏁 Final Game Results:**
|
| 113 |
+
- True Target: {state.target_value}
|
| 114 |
+
- Final Guess: {state.most_likely_target} {final_correct}
|
| 115 |
+
- Final Accuracy: {summary["final_accuracy"]:.3f} (probability assigned to true target)
|
| 116 |
+
- Final Entropy: {state.belief_entropy:.2f} bits
|
| 117 |
+
- Rounds Played: {state.round_number}/{state.max_rounds}
|
| 118 |
+
- Evidence Collected: {summary["evidence_count"]} pieces
|
| 119 |
+
|
| 120 |
+
**📊 Learning Performance:**
|
| 121 |
+
- Started with uniform beliefs (entropy: {np.log2(len(state.current_beliefs)):.2f} bits)
|
| 122 |
+
- Ended with entropy: {state.belief_entropy:.2f} bits
|
| 123 |
+
- Information gained: {np.log2(len(state.current_beliefs)) - state.belief_entropy:.2f} bits
|
| 124 |
+
"""
|
| 125 |
+
else:
|
| 126 |
+
# Show current game state
|
| 127 |
+
round_info = f"""
|
| 128 |
+
**Game Settings:**
|
| 129 |
+
- Target Value: {state.target_value} (hidden from Player 2)
|
| 130 |
+
- Most Likely Target: {state.most_likely_target}
|
| 131 |
+
- Belief Entropy: {state.belief_entropy:.2f} bits
|
| 132 |
+
- Round: {state.round_number}/{state.max_rounds}
|
| 133 |
+
"""
|
| 134 |
+
else:
|
| 135 |
+
round_info = "Start a new game to see round information."
|
| 136 |
+
|
| 137 |
+
# Belief visualization
|
| 138 |
+
belief_chart = self._create_belief_chart()
|
| 139 |
+
|
| 140 |
+
# Game log
|
| 141 |
+
game_log = self._create_game_log()
|
| 142 |
+
|
| 143 |
+
return status, round_info, belief_chart, game_log
|
| 144 |
+
|
| 145 |
+
def _create_belief_chart(self) -> plt.Figure:
|
| 146 |
+
"""Create belief distribution chart.
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
Matplotlib figure showing belief distribution
|
| 150 |
+
"""
|
| 151 |
+
# Close any existing figures to prevent memory leaks
|
| 152 |
+
plt.close('all')
|
| 153 |
+
|
| 154 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 155 |
+
|
| 156 |
+
if self.game.game_state.current_beliefs:
|
| 157 |
+
targets = list(range(1, len(self.game.game_state.current_beliefs) + 1))
|
| 158 |
+
beliefs = self.game.game_state.current_beliefs
|
| 159 |
+
|
| 160 |
+
bars = ax.bar(
|
| 161 |
+
targets, beliefs, alpha=0.7, color="skyblue", edgecolor="navy"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
# Highlight the most likely target
|
| 165 |
+
if self.game.game_state.most_likely_target:
|
| 166 |
+
most_likely_idx = self.game.game_state.most_likely_target - 1
|
| 167 |
+
bars[most_likely_idx].set_color("orange")
|
| 168 |
+
bars[most_likely_idx].set_alpha(1.0)
|
| 169 |
+
|
| 170 |
+
# Highlight true target if known
|
| 171 |
+
if self.game.game_state.target_value:
|
| 172 |
+
true_target_idx = self.game.game_state.target_value - 1
|
| 173 |
+
bars[true_target_idx].set_edgecolor("red")
|
| 174 |
+
bars[true_target_idx].set_linewidth(3)
|
| 175 |
+
|
| 176 |
+
ax.set_xlabel("Target Value")
|
| 177 |
+
ax.set_ylabel("Belief Probability")
|
| 178 |
+
|
| 179 |
+
# Enhanced title based on game state
|
| 180 |
+
if self.game.game_state.phase == GamePhase.FINISHED:
|
| 181 |
+
correct_indicator = "✅" if self.game.was_final_guess_correct() else "❌"
|
| 182 |
+
ax.set_title(f"Final Belief Distribution {correct_indicator}")
|
| 183 |
+
else:
|
| 184 |
+
ax.set_title("Player 2's Belief Distribution")
|
| 185 |
+
|
| 186 |
+
ax.set_xticks(targets)
|
| 187 |
+
ax.set_ylim(0, 1)
|
| 188 |
+
ax.grid(True, alpha=0.3)
|
| 189 |
+
|
| 190 |
+
# Add legend
|
| 191 |
+
legend_elements = []
|
| 192 |
+
if self.game.game_state.most_likely_target:
|
| 193 |
+
legend_elements.append(
|
| 194 |
+
plt.Rectangle(
|
| 195 |
+
(0, 0), 1, 1, fc="orange", alpha=1.0, label="Most Likely"
|
| 196 |
+
)
|
| 197 |
+
)
|
| 198 |
+
if self.game.game_state.target_value:
|
| 199 |
+
legend_elements.append(
|
| 200 |
+
plt.Rectangle(
|
| 201 |
+
(0, 0), 1, 1, fc="skyblue", ec="red", lw=3, label="True Target"
|
| 202 |
+
)
|
| 203 |
+
)
|
| 204 |
+
if legend_elements:
|
| 205 |
+
ax.legend(handles=legend_elements)
|
| 206 |
+
else:
|
| 207 |
+
ax.text(
|
| 208 |
+
0.5,
|
| 209 |
+
0.5,
|
| 210 |
+
"Start a game to see beliefs",
|
| 211 |
+
transform=ax.transAxes,
|
| 212 |
+
ha="center",
|
| 213 |
+
va="center",
|
| 214 |
+
fontsize=14,
|
| 215 |
+
)
|
| 216 |
+
ax.set_xlim(0, 1)
|
| 217 |
+
ax.set_ylim(0, 1)
|
| 218 |
+
|
| 219 |
+
plt.tight_layout()
|
| 220 |
+
return fig
|
| 221 |
+
|
| 222 |
+
def _create_empty_chart(self) -> plt.Figure:
|
| 223 |
+
"""Create an empty chart for error states.
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
Matplotlib figure with error message
|
| 227 |
+
"""
|
| 228 |
+
# Close any existing figures to prevent memory leaks
|
| 229 |
+
plt.close('all')
|
| 230 |
+
|
| 231 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 232 |
+
ax.text(
|
| 233 |
+
0.5,
|
| 234 |
+
0.5,
|
| 235 |
+
"Error: Unable to display chart",
|
| 236 |
+
transform=ax.transAxes,
|
| 237 |
+
ha="center",
|
| 238 |
+
va="center",
|
| 239 |
+
fontsize=14,
|
| 240 |
+
color="red"
|
| 241 |
+
)
|
| 242 |
+
ax.set_xlim(0, 1)
|
| 243 |
+
ax.set_ylim(0, 1)
|
| 244 |
+
ax.set_title("Chart Error")
|
| 245 |
+
plt.tight_layout()
|
| 246 |
+
return fig
|
| 247 |
+
|
| 248 |
+
def _create_game_log(self) -> str:
|
| 249 |
+
"""Create game log showing evidence history.
|
| 250 |
+
|
| 251 |
+
Returns:
|
| 252 |
+
Formatted string with game log
|
| 253 |
+
"""
|
| 254 |
+
if not self.game.game_state.evidence_history:
|
| 255 |
+
return "No evidence yet. Start playing rounds to see the log."
|
| 256 |
+
|
| 257 |
+
log_lines = ["**Evidence History:**\n"]
|
| 258 |
+
|
| 259 |
+
for i, evidence in enumerate(self.game.game_state.evidence_history, 1):
|
| 260 |
+
emoji = {"higher": "⬆️", "lower": "⬇️", "same": "🎯"}[
|
| 261 |
+
evidence.comparison_result
|
| 262 |
+
]
|
| 263 |
+
log_lines.append(
|
| 264 |
+
f"Round {i}: Rolled {evidence.dice_roll} → {evidence.comparison_result} {emoji}"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Add completion message if game is finished
|
| 268 |
+
if self.game.game_state.phase == GamePhase.FINISHED:
|
| 269 |
+
log_lines.append("")
|
| 270 |
+
log_lines.append("**🏁 Game Completed!**")
|
| 271 |
+
|
| 272 |
+
if self.game.was_final_guess_correct():
|
| 273 |
+
log_lines.append("🎉 **Congratulations!** Player 2 correctly identified the target!")
|
| 274 |
+
else:
|
| 275 |
+
log_lines.append("📈 **Learning opportunity!** Player 2's beliefs converged but missed the target.")
|
| 276 |
+
|
| 277 |
+
# Add some Bayesian insights
|
| 278 |
+
final_accuracy = self.game.get_final_guess_accuracy()
|
| 279 |
+
if final_accuracy > 0.5:
|
| 280 |
+
log_lines.append(f"🎯 Strong evidence: {final_accuracy:.1%} confidence in true target")
|
| 281 |
+
elif final_accuracy > 0.3:
|
| 282 |
+
log_lines.append(f"🤔 Moderate evidence: {final_accuracy:.1%} confidence in true target")
|
| 283 |
+
else:
|
| 284 |
+
log_lines.append(f"🌫️ Conflicting evidence: Only {final_accuracy:.1%} confidence in true target")
|
| 285 |
+
|
| 286 |
+
return "\n".join(log_lines)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def create_interface() -> gr.Interface:
|
| 290 |
+
"""Create and return the Gradio interface.
|
| 291 |
+
|
| 292 |
+
Returns:
|
| 293 |
+
Configured Gradio interface
|
| 294 |
+
"""
|
| 295 |
+
interface = GradioInterface()
|
| 296 |
+
|
| 297 |
+
with gr.Blocks(title="Bayesian Game", theme=gr.themes.Soft()) as demo:
|
| 298 |
+
gr.Markdown("# 🎲 Bayesian Game")
|
| 299 |
+
gr.Markdown(
|
| 300 |
+
"""
|
| 301 |
+
**Game Rules:**
|
| 302 |
+
- Judge and Player 1 can see the target die value
|
| 303 |
+
- Player 2 must deduce the target value using Bayesian inference
|
| 304 |
+
- Each round: Player 1 rolls dice and reports "higher"/"lower"/"same" compared to target
|
| 305 |
+
- Game runs for a specified number of rounds
|
| 306 |
+
"""
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
with gr.Row():
|
| 310 |
+
with gr.Column(scale=1):
|
| 311 |
+
gr.Markdown("### Game Controls")
|
| 312 |
+
|
| 313 |
+
with gr.Row():
|
| 314 |
+
dice_sides = gr.Number(
|
| 315 |
+
value=6, label="Dice Sides", minimum=2, maximum=20, precision=0
|
| 316 |
+
)
|
| 317 |
+
max_rounds = gr.Number(
|
| 318 |
+
value=10, label="Max Rounds", minimum=1, maximum=50, precision=0
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
reset_btn = gr.Button("🔄 Reset Game", variant="secondary")
|
| 322 |
+
|
| 323 |
+
target_input = gr.Textbox(
|
| 324 |
+
label="Target Value (optional)",
|
| 325 |
+
placeholder="Leave empty for random target",
|
| 326 |
+
max_lines=1,
|
| 327 |
+
)
|
| 328 |
+
start_btn = gr.Button("🎯 Start New Game", variant="primary")
|
| 329 |
+
play_btn = gr.Button("🎲 Play Round", variant="secondary")
|
| 330 |
+
|
| 331 |
+
with gr.Column(scale=2):
|
| 332 |
+
status_output = gr.Textbox(label="Game Status", interactive=False)
|
| 333 |
+
round_info = gr.Markdown("Start a new game to begin.")
|
| 334 |
+
|
| 335 |
+
with gr.Row():
|
| 336 |
+
with gr.Column():
|
| 337 |
+
belief_plot = gr.Plot(label="Belief Distribution")
|
| 338 |
+
with gr.Column():
|
| 339 |
+
game_log = gr.Markdown("Game log will appear here.")
|
| 340 |
+
|
| 341 |
+
# Event handlers
|
| 342 |
+
reset_btn.click(
|
| 343 |
+
interface.reset_game,
|
| 344 |
+
inputs=[dice_sides, max_rounds],
|
| 345 |
+
outputs=[status_output, round_info, belief_plot, game_log],
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
start_btn.click(
|
| 349 |
+
interface.start_new_game,
|
| 350 |
+
inputs=[target_input],
|
| 351 |
+
outputs=[status_output, round_info, belief_plot, game_log],
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
play_btn.click(
|
| 355 |
+
interface.play_round,
|
| 356 |
+
outputs=[status_output, round_info, belief_plot, game_log],
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
# Initialize interface
|
| 360 |
+
demo.load(
|
| 361 |
+
interface._get_interface_state,
|
| 362 |
+
outputs=[status_output, round_info, belief_plot, game_log],
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
return demo
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
if __name__ == "__main__":
|
| 369 |
+
demo = create_interface()
|
| 370 |
+
demo.launch()
|