Upload 15 files
Browse files- .gitattributes +1 -0
- UNPACK/README.md +636 -0
- UNPACK/cocoon_drone_adapter.py +712 -0
- UNPACK/cocoon_drone_arena.py +0 -0
- UNPACK/cocoon_tmrl_adapter.py +1724 -0
- UNPACK/curriculum/connector_words.json +61 -0
- UNPACK/curriculum/dialogue_frames.json +48 -0
- UNPACK/curriculum/game_language_tasks.json +50 -0
- UNPACK/curriculum/reward_rubric.json +25 -0
- UNPACK/curriculum/role_transform_tasks.json +29 -0
- UNPACK/jsbsim_quadcopter.py +1141 -0
- UNPACK/metadata.json +144 -0
- UNPACK/requirements.txt +22 -0
- UNPACK/training_logs/schema.json +33 -0
- UNPACK/vocabulary.json +0 -0
- UNPACK/work!.py +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
UNPACK/work!.py filter=lfs diff=lfs merge=lfs -text
|
UNPACK/README.md
ADDED
|
@@ -0,0 +1,636 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🦋 Butterfly Cocoon - Standalone Agent
|
| 2 |
+
|
| 3 |
+
**Generated:** 2026-05-06T06:46:03.658300
|
| 4 |
+
**Mode:** ENSEMBLE (107 organisms)
|
| 5 |
+
**Template Size:** 364,349,579 chars (code only)
|
| 6 |
+
**Classes:** 15 (Neural + Language + Memory + Knowledge + VP)
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 🧬 Formation Fingerprint
|
| 10 |
+
|
| 11 |
+
This cocoon's emergent history - how these organisms came to be:
|
| 12 |
+
|
| 13 |
+
**Fitness:** min=0.7559, max=0.7559, mean=0.7559
|
| 14 |
+
|
| 15 |
+
**Events Witnessed:** 38,156 total
|
| 16 |
+
**Top Event Types:** neural_decision (23081), alliance_event_recorded (3117), highlander_organism_registered (2000), alliance_alliance_dissolved (1969), alliance_member_left (1624)
|
| 17 |
+
|
| 18 |
+
**Alliance Landscape:** 591 total alliances
|
| 19 |
+
- Alliance `alliance_1_9e44_615f` (tier 1, 2 members)
|
| 20 |
+
- Alliance `alliance_1_cff0_d9d7` (tier 1, 2 members)
|
| 21 |
+
- Alliance `alliance_1_5712_567c` (tier 1, 2 members)
|
| 22 |
+
|
| 23 |
+
**Simulation Snapshot:**
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## 🧠 Neural Topology Visualization
|
| 28 |
+
|
| 29 |
+
**[📊 Open Interactive Topology Viewer](ensemble_topology.html)**
|
| 30 |
+
|
| 31 |
+
The topology visualization provides:
|
| 32 |
+
- **Per-organism layers** - Toggle individual neural networks on/off
|
| 33 |
+
- **Overlay mode** - See all organisms' architectures superimposed
|
| 34 |
+
- **Stacked mode** - View organisms in horizontal strips
|
| 35 |
+
- **Grid mode** - Compare organisms side-by-side
|
| 36 |
+
- **Color-coded neurons** - Input (cyan), Hidden (magenta), Output (yellow), Language (green)
|
| 37 |
+
|
| 38 |
+
*Open the HTML file in a browser for the full interactive experience.*
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 🧠 What's Inside
|
| 45 |
+
|
| 46 |
+
This is a **MONOLITHIC** cocoon - a completely self-contained Python file with:
|
| 47 |
+
|
| 48 |
+
**Organisms:**
|
| 49 |
+
- `edbc366172639024`
|
| 50 |
+
- `86d78ecb17378ff1`
|
| 51 |
+
- `cd2e3d9e8344e077`
|
| 52 |
+
- `f585fb9f20bb0729`
|
| 53 |
+
- `951c9f843b0d9243`
|
| 54 |
+
- `fd5dbc8866ea1bde`
|
| 55 |
+
- `43ddb19a041390c6`
|
| 56 |
+
- `58f7850cc2ed618d`
|
| 57 |
+
- `c79f68de668b36e3`
|
| 58 |
+
- `81323964002dba96`
|
| 59 |
+
- `b168fd01c96dd355`
|
| 60 |
+
- `43d8288b2748e1bf`
|
| 61 |
+
- `9e6e0b030a372015`
|
| 62 |
+
- `9dc419a36357d7a7`
|
| 63 |
+
- `c1f6f11bfbc53479`
|
| 64 |
+
- `5a584dd72a843b1b`
|
| 65 |
+
- `449d555f97089ff4`
|
| 66 |
+
- `fbeb2853dc105919`
|
| 67 |
+
- `30c6b10eadcdc3e9`
|
| 68 |
+
- `7798509f4e099717`
|
| 69 |
+
- `9674ac0a0b07650a`
|
| 70 |
+
- `fab689bcb08d3e58`
|
| 71 |
+
- `93c892a86a589860`
|
| 72 |
+
- `d70097c35b0242c8`
|
| 73 |
+
- `2e0397589f23af91`
|
| 74 |
+
- `858f84cc6270de47`
|
| 75 |
+
- `df6a436351b53474`
|
| 76 |
+
- `646348e1be52244f`
|
| 77 |
+
- `589802d5746181db`
|
| 78 |
+
- `c11c5b0df4de0a37`
|
| 79 |
+
- `04649226ae9efebb`
|
| 80 |
+
- `e8173306bdfd4c13`
|
| 81 |
+
- `78870f7003517a3a`
|
| 82 |
+
- `6d89bac8dbcfd59c`
|
| 83 |
+
- `f4bddc2f5be6686e`
|
| 84 |
+
- `33a5293e4c3ac3cf`
|
| 85 |
+
- `31d897dc0cafa21a`
|
| 86 |
+
- `3414fcd46bc6c66d`
|
| 87 |
+
- `c5109ee5294e4a7e`
|
| 88 |
+
- `e547dad6892d4c45`
|
| 89 |
+
- `2a0a04b7921a1671`
|
| 90 |
+
- `92a453e86e1e0e0e`
|
| 91 |
+
- `2df24a997db6d851`
|
| 92 |
+
- `1345cbbcf514c715`
|
| 93 |
+
- `62a276d820a94e68`
|
| 94 |
+
- `417bfd09dbf06bf4`
|
| 95 |
+
- `c55fa8f9abd047f1`
|
| 96 |
+
- `821db11ec8e1952a`
|
| 97 |
+
- `2a86a4de18d7a088`
|
| 98 |
+
- `a4b6929eb93343bf`
|
| 99 |
+
- `56e76c222a39c0e3`
|
| 100 |
+
- `98aa5e6a4b474acc`
|
| 101 |
+
- `b5c7ef0643d91c56`
|
| 102 |
+
- `819596e8f6ee7600`
|
| 103 |
+
- `8cda83a3997f0c31`
|
| 104 |
+
- `55256341f7b9af24`
|
| 105 |
+
- `1438f196417bdb0b`
|
| 106 |
+
- `277a3319b1c4cf53`
|
| 107 |
+
- `567cf59af9f137b4`
|
| 108 |
+
- `4cfaddc9dce4a5f7`
|
| 109 |
+
- `b9d3440251c48761`
|
| 110 |
+
- `2e2121ad1c57593f`
|
| 111 |
+
- `24e7cd88b78393da`
|
| 112 |
+
- `a2f1a9edae3711f6`
|
| 113 |
+
- `0b58d859da8c0b02`
|
| 114 |
+
- `f42be2fb7c734fe8`
|
| 115 |
+
- `9e44f76626a0bd6d`
|
| 116 |
+
- `745d97256adcdbde`
|
| 117 |
+
- `d9d7efccd4f56acb`
|
| 118 |
+
- `b7d80845618bc5ae`
|
| 119 |
+
- `c988215ab0ae0567`
|
| 120 |
+
- `68849731ee30a5db`
|
| 121 |
+
- `5e971e526a546789`
|
| 122 |
+
- `b340af532366cc7c`
|
| 123 |
+
- `59a4a010bd57af65`
|
| 124 |
+
- `ca01f4181bf90a0d`
|
| 125 |
+
- `c0a3093a306aa9f6`
|
| 126 |
+
- `f6fa3568de13430c`
|
| 127 |
+
- `f558482357ee27fc`
|
| 128 |
+
- `f0b599001944f186`
|
| 129 |
+
- `9c71e95851243c24`
|
| 130 |
+
- `6e924f6134d2fe59`
|
| 131 |
+
- `8c09eb8977720979`
|
| 132 |
+
- `1fa598a907e91802`
|
| 133 |
+
- `08fdaf4d05ac65a8`
|
| 134 |
+
- `731939b8691bdfc0`
|
| 135 |
+
- `ffdb2164fe3eefb0`
|
| 136 |
+
- `615fe8569ce56dba`
|
| 137 |
+
- `787ea58fca362124`
|
| 138 |
+
- `6e8090766e191505`
|
| 139 |
+
- `221ec40b2bed240d`
|
| 140 |
+
- `c38a656005161d6d`
|
| 141 |
+
- `4bf524bf5dd7ca28`
|
| 142 |
+
- `b40ff22aa6b46340`
|
| 143 |
+
- `a8ed3e3b9df0d23b`
|
| 144 |
+
- `f57ad03fba4f1062`
|
| 145 |
+
- `1141890b4a500eb1`
|
| 146 |
+
- `90c2b87c11e71a49`
|
| 147 |
+
- `4ce5894e48795ae6`
|
| 148 |
+
- `0a7244228613e835`
|
| 149 |
+
- `392c4f9ffcb97860`
|
| 150 |
+
- `5ee9a85dbd894e10`
|
| 151 |
+
- `8ffa19fbf9e1caec`
|
| 152 |
+
- `96195a384b90b4ca`
|
| 153 |
+
- `73a3c676059a4d06`
|
| 154 |
+
- `300e99a67053e897`
|
| 155 |
+
- `47cd3c24adc3b8c2`
|
| 156 |
+
|
| 157 |
+
**Embedded Subsystems:**
|
| 158 |
+
|
| 159 |
+
| Subsystem | Purpose | Continued Learning |
|
| 160 |
+
|-----------|---------|-------------------|
|
| 161 |
+
| `OrganismBrain` | Neural network (action + language) | ✅ Yes - weights updated via backprop |
|
| 162 |
+
| `HopfieldLayer` | Iterative thought refinement (energy-based) | ✅ Yes - pattern memory learns |
|
| 163 |
+
| `MultiHeadAttention` | VP-aware self-attention | ✅ Yes - attention weights updated |
|
| 164 |
+
| `AtomicLanguageSystem` | Semantic units with emotion/context | ✅ Yes - atoms can be created/reinforced |
|
| 165 |
+
| `ConversationHistory` | Topic tracking & context memory | ✅ Yes - grows with each conversation |
|
| 166 |
+
| `EnhancedKnowledgeWeb` | Semantic relations between concepts | ✅ Yes - relations added/strengthened |
|
| 167 |
+
| `VPRuntime` | Self-regulation (Vigilance × Plasticity) | ✅ Yes - adapts from state |
|
| 168 |
+
| `ExperienceBuffer` | Learning from past experiences | ✅ Yes - buffer grows with experience |
|
| 169 |
+
| `SphereArena` | 3D swarm defense training game | ✅ Yes - organisms learn during play |
|
| 170 |
+
|
| 171 |
+
**Embedded Data:**
|
| 172 |
+
- Neural weights (Base64-encoded PyTorch state dicts)
|
| 173 |
+
- Vocabulary (token↔id mapping)
|
| 174 |
+
- Atomic language corpus (if available)
|
| 175 |
+
- Conversation history (if available)
|
| 176 |
+
|
| 177 |
+
---
|
| 178 |
+
|
| 179 |
+
## 🔥 Continued Learning
|
| 180 |
+
|
| 181 |
+
**YES, this cocoon supports continued learning!**
|
| 182 |
+
|
| 183 |
+
The cocoon.py file contains full PyTorch modules that can continue training:
|
| 184 |
+
|
| 185 |
+
1. **Full PyTorch modules** - can call `backward()` and update gradients
|
| 186 |
+
2. **ExperienceBuffer** - stores (state, action, reward) tuples for replay
|
| 187 |
+
3. **AtomicLanguageSystem** - creates new semantic atoms from conversations
|
| 188 |
+
4. **EnhancedKnowledgeWeb** - grows semantic relations as concepts connect
|
| 189 |
+
5. **ConversationHistory** - accumulates context over time
|
| 190 |
+
|
| 191 |
+
```python
|
| 192 |
+
# The agent learns from every interaction:
|
| 193 |
+
agent = CocoonAgent()
|
| 194 |
+
action, output = agent.get_action(state) # Updates VP, stores experience
|
| 195 |
+
agent.atomic_lang.create_atom("new_concept", "definition", emotion=0.8) # Creates new atom
|
| 196 |
+
agent.knowledge_web.add_relation("concept_a", "concept_b", "related_to", strength=0.9) # Grows web
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
**Export Comparison:**
|
| 200 |
+
|
| 201 |
+
| Format | File | Learning | Subsystems | Portability |
|
| 202 |
+
|--------|------|----------|------------|-------------|
|
| 203 |
+
| `cocoon.py` | Python source | ✅ Full (neural + symbolic) | ✅ All | Python only |
|
| 204 |
+
| `.pt` | TorchScript | ✅ Neural only* | ❌ None | PyTorch/LibTorch/C++ |
|
| 205 |
+
| `.onnx` | ONNX model | ❌ Inference only | ❌ None | Universal (C++, JS, Rust) |
|
| 206 |
+
| `.statedict` | Weights only | ✅ Loadable | ❌ None | PyTorch |
|
| 207 |
+
|
| 208 |
+
*TorchScript (.pt) **CAN** continue learning! Load with `torch.jit.load()`, call `.train()`, run backward pass.
|
| 209 |
+
However, it only contains the neural network - no AtomicLanguageSystem, KnowledgeWeb, or other symbolic subsystems.
|
| 210 |
+
|
| 211 |
+
**Fine-tuning a TorchScript model:**
|
| 212 |
+
```python
|
| 213 |
+
import torch
|
| 214 |
+
|
| 215 |
+
# Load the exported TorchScript model
|
| 216 |
+
model = torch.jit.load("brain_ensemble.pt")
|
| 217 |
+
model.train()
|
| 218 |
+
|
| 219 |
+
# Fine-tune on new data
|
| 220 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
|
| 221 |
+
for state, target in new_training_data:
|
| 222 |
+
optimizer.zero_grad()
|
| 223 |
+
output = model(state)
|
| 224 |
+
loss = criterion(output, target)
|
| 225 |
+
loss.backward()
|
| 226 |
+
optimizer.step()
|
| 227 |
+
|
| 228 |
+
# Save updated model
|
| 229 |
+
torch.jit.save(model, "brain_finetuned.pt")
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
---
|
| 233 |
+
|
| 234 |
+
## 🚀 Quick Start
|
| 235 |
+
|
| 236 |
+
```bash
|
| 237 |
+
# View cocoon info
|
| 238 |
+
python cocoon.py --mode info
|
| 239 |
+
|
| 240 |
+
# Start chatting
|
| 241 |
+
python cocoon.py --mode chat
|
| 242 |
+
|
| 243 |
+
# Play games
|
| 244 |
+
python cocoon.py --mode gym --env CartPole-v1
|
| 245 |
+
|
| 246 |
+
# 3D sphere arena
|
| 247 |
+
python cocoon.py --mode sphere --train
|
| 248 |
+
|
| 249 |
+
# 🛸 Drone warfare (extract adapter first)
|
| 250 |
+
python cocoon.py --unpack ./my_cocoon
|
| 251 |
+
python cocoon_drone_adapter.py --mode tag_battle
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
---
|
| 255 |
+
|
| 256 |
+
## 📚 Complete Command Reference
|
| 257 |
+
|
| 258 |
+
### Mode Selection
|
| 259 |
+
|
| 260 |
+
| Mode | Command | Description |
|
| 261 |
+
|------|---------|-------------|
|
| 262 |
+
| **info** | `python cocoon.py --mode info` | Show organism metadata, vocabulary, architecture (default) |
|
| 263 |
+
| **chat** | `python cocoon.py --mode chat` | Interactive conversation with learning |
|
| 264 |
+
| **gym** | `python cocoon.py --mode gym` | Train/test in Gymnasium environments |
|
| 265 |
+
| **serve** | `python cocoon.py --mode serve` | HTTP API server |
|
| 266 |
+
| **sphere** | `python cocoon.py --mode sphere` | 3D Sphere Arena swarm defense |
|
| 267 |
+
| **link** | `python cocoon.py --mode link` | P2P networking for cocoon battles |
|
| 268 |
+
| **drone** | `python cocoon_drone_adapter.py` | 🛸 Drone warfare arena (companion script) |
|
| 269 |
+
|
| 270 |
+
---
|
| 271 |
+
|
| 272 |
+
### 💬 Chat Mode
|
| 273 |
+
|
| 274 |
+
Interactive conversation with the neural organisms. Learns from every interaction.
|
| 275 |
+
|
| 276 |
+
```bash
|
| 277 |
+
python cocoon.py --mode chat
|
| 278 |
+
python cocoon.py --mode chat --verbose
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
**In-Chat Commands:**
|
| 282 |
+
|
| 283 |
+
| Command | Description |
|
| 284 |
+
|---------|-------------|
|
| 285 |
+
| `quit` | Exit chat mode |
|
| 286 |
+
| `export <file.py>` | Save current state to new cocoon file |
|
| 287 |
+
|
| 288 |
+
---
|
| 289 |
+
|
| 290 |
+
### 🌐 Sphere Arena (3D Training)
|
| 291 |
+
|
| 292 |
+
Swarm defense game where organisms cooperate to catch falling balls.
|
| 293 |
+
|
| 294 |
+
| Command | Description |
|
| 295 |
+
|---------|-------------|
|
| 296 |
+
| `python cocoon.py --mode sphere` | Play sphere defense |
|
| 297 |
+
| `python cocoon.py --mode sphere --train` | Play + learn from experience |
|
| 298 |
+
| `python cocoon.py --mode sphere --demo` | Preview with dummy AI |
|
| 299 |
+
| `python cocoon.py --mode sphere --headless` | Train without display |
|
| 300 |
+
| `python cocoon.py --mode sphere --balls 3 --train` | Multi-ball training |
|
| 301 |
+
| `python cocoon.py --mode sphere --misses 5 --train` | Harder difficulty |
|
| 302 |
+
|
| 303 |
+
**Sphere Arena Flags:**
|
| 304 |
+
|
| 305 |
+
| Flag | Default | Description |
|
| 306 |
+
|------|---------|-------------|
|
| 307 |
+
| `--balls N` | 1 | Number of balls (1-5) |
|
| 308 |
+
| `--misses N` | 10 | Max collective misses before game over |
|
| 309 |
+
| `--train` | off | Enable post-snapshot training |
|
| 310 |
+
| `--demo` | off | Run with dummy AI for preview |
|
| 311 |
+
| `--headless` | off | No display (training only) |
|
| 312 |
+
| `--verbose` | off | Verbose debug logging |
|
| 313 |
+
|
| 314 |
+
---
|
| 315 |
+
|
| 316 |
+
### 🛸 Drone Warfare Arena (Companion Script)
|
| 317 |
+
|
| 318 |
+
NASA JSBSim-grade drone combat simulation. **Complete system embedded - extract with --unpack.**
|
| 319 |
+
|
| 320 |
+
**Setup:**
|
| 321 |
+
```bash
|
| 322 |
+
python cocoon.py --unpack ./my_cocoon # Extracts full drone suite:
|
| 323 |
+
# - cocoon_drone_adapter.py (main entry point)
|
| 324 |
+
# - cocoon_drone_arena.py (8-mode arena)
|
| 325 |
+
# - jsbsim_quadcopter.py (6-DOF physics)
|
| 326 |
+
cd my_cocoon
|
| 327 |
+
python cocoon_drone_adapter.py # Run the adapter
|
| 328 |
+
```
|
| 329 |
+
|
| 330 |
+
| Command | Description |
|
| 331 |
+
|---------|-------------|
|
| 332 |
+
| `python cocoon_drone_adapter.py` | Interactive mode picker |
|
| 333 |
+
| `python cocoon_drone_adapter.py --mode free_fly` | Basic flight training |
|
| 334 |
+
| `python cocoon_drone_adapter.py --mode tag_battle` | Combat: tag enemies |
|
| 335 |
+
| `python cocoon_drone_adapter.py --mode survival` | Last drone flying wins |
|
| 336 |
+
| `python cocoon_drone_adapter.py --all` | Run all 8 modes |
|
| 337 |
+
| `python cocoon_drone_adapter.py --visual` | 3D visualization (requires PyFlyt) |
|
| 338 |
+
|
| 339 |
+
**Game Modes:** `free_fly`, `formation`, `pursuit`, `tag_battle`, `zone_control`, `capture_flag`, `survival`, `escort`
|
| 340 |
+
|
| 341 |
+
**Requirements:** `pip install numpy matplotlib` (PyFlyt optional: `pip install PyFlyt`)
|
| 342 |
+
|
| 343 |
+
---
|
| 344 |
+
|
| 345 |
+
### 🎮 Gymnasium Environments
|
| 346 |
+
|
| 347 |
+
**Built-in (always available):**
|
| 348 |
+
|
| 349 |
+
| Command | Description |
|
| 350 |
+
|---------|-------------|
|
| 351 |
+
| `python cocoon.py --mode gym --env CartPole-v1` | Classic pole balancing |
|
| 352 |
+
| `python cocoon.py --mode gym --env MountainCar-v0` | Drive up hill |
|
| 353 |
+
| `python cocoon.py --mode gym --env Acrobot-v1` | Double pendulum |
|
| 354 |
+
| `python cocoon.py --mode gym --env FrozenLake-v1` | Navigate slippery ice |
|
| 355 |
+
| `python cocoon.py --mode gym --env Taxi-v3` | Pickup & delivery |
|
| 356 |
+
| `python cocoon.py --mode gym --env Blackjack-v1` | Beat the dealer |
|
| 357 |
+
|
| 358 |
+
**Atari (`pip install ale-py`):****
|
| 359 |
+
- `ALE/Pong-v5`, `ALE/Breakout-v5`, `ALE/SpaceInvaders-v5`
|
| 360 |
+
|
| 361 |
+
**MuJoCo (`pip install gymnasium[mujoco]`):**
|
| 362 |
+
- `Ant-v4`, `HalfCheetah-v4`
|
| 363 |
+
|
| 364 |
+
**Gym Flags:**
|
| 365 |
+
|
| 366 |
+
| Flag | Default | Description |
|
| 367 |
+
|------|---------|-------------|
|
| 368 |
+
| `--env NAME` | CartPole-v1 | Gymnasium environment name |
|
| 369 |
+
| `--episodes N` | 100 | Number of episodes to run |
|
| 370 |
+
| `--render` | off | Show visual window |
|
| 371 |
+
| `--no-learn` | off | Disable online learning (inference only) |
|
| 372 |
+
|
| 373 |
+
---
|
| 374 |
+
|
| 375 |
+
### �️ TrackMania 2020 (TMRL Integration)
|
| 376 |
+
|
| 377 |
+
Drive TrackMania 2020 with your cocoon organisms using the embedded TMRL adapter!
|
| 378 |
+
|
| 379 |
+
**Requirements:**
|
| 380 |
+
1. TrackMania 2020 (Ubisoft/Epic)
|
| 381 |
+
2. OpenPlanet plugin installed (openplanet.dev)
|
| 382 |
+
3. TMRL Python package: `pip install tmrl`
|
| 383 |
+
4. Extract `cocoon_tmrl_adapter.py` via `--unpack`
|
| 384 |
+
|
| 385 |
+
**Quick Start:**
|
| 386 |
+
```bash
|
| 387 |
+
# Extract adapter from cocoon
|
| 388 |
+
python cocoon.py --unpack ./my_tmrl
|
| 389 |
+
|
| 390 |
+
# Run the adapter
|
| 391 |
+
python cocoon_tmrl_adapter.py --cocoon path/to/cocoon.py --drive --episodes 4
|
| 392 |
+
```
|
| 393 |
+
|
| 394 |
+
**Important:**
|
| 395 |
+
- Play on the **"tmrl-test"** track for proper rewards (search in TrackMania)
|
| 396 |
+
- The adapter uses LIDAR observations + speed data
|
| 397 |
+
- Ensembles use majority voting for actions
|
| 398 |
+
|
| 399 |
+
**TMRL Adapter Commands:**
|
| 400 |
+
|
| 401 |
+
| Flag | Description |
|
| 402 |
+
|------|-------------|
|
| 403 |
+
| `--drive` | Inference mode (watch it play) |
|
| 404 |
+
| `--train` | Learning mode (organisms improve) |
|
| 405 |
+
| `--episodes N` | Number of races to run |
|
| 406 |
+
| `--organism N` | Use specific organism (0 = ensemble) |
|
| 407 |
+
|
| 408 |
+
---
|
| 409 |
+
|
| 410 |
+
### �🌐 HTTP API Server
|
| 411 |
+
|
| 412 |
+
```bash
|
| 413 |
+
python cocoon.py --mode serve --port 8080
|
| 414 |
+
```
|
| 415 |
+
|
| 416 |
+
**Endpoints:**
|
| 417 |
+
|
| 418 |
+
| Method | Endpoint | Description |
|
| 419 |
+
|--------|----------|-------------|
|
| 420 |
+
| `GET` | `/health` | Health check - returns organism count |
|
| 421 |
+
| `POST` | `/act` | Get action for state vector |
|
| 422 |
+
| `POST` | `/learn` | Add experience + train step |
|
| 423 |
+
| `POST` | `/chat` | Chat with learning (returns all organism responses) |
|
| 424 |
+
| `POST` | `/teach` | Teach new words/concepts |
|
| 425 |
+
| `GET` | `/vocab` | Get current vocabulary |
|
| 426 |
+
| `GET` | `/curriculum` | Get staged language curriculum and reward rubric |
|
| 427 |
+
| `GET` | `/training/logs` | Get recent post-export learning traces |
|
| 428 |
+
| `POST` | `/curriculum/score` | Submit outside coach reward score |
|
| 429 |
+
|
| 430 |
+
**Example `/chat` request:**
|
| 431 |
+
```bash
|
| 432 |
+
curl -X POST http://localhost:8080/chat \
|
| 433 |
+
-H "Content-Type: application/json" \
|
| 434 |
+
-d '{"prompt": "Hello!", "learn": true}'
|
| 435 |
+
```
|
| 436 |
+
|
| 437 |
+
---
|
| 438 |
+
|
| 439 |
+
### 🔗 Link Mode (P2P Networking)
|
| 440 |
+
|
| 441 |
+
Connect to other cocoons for battles and chat.
|
| 442 |
+
|
| 443 |
+
```bash
|
| 444 |
+
python cocoon.py --mode link --hatch ws://server:9000 --name "Champion"
|
| 445 |
+
```
|
| 446 |
+
|
| 447 |
+
**Link Mode Flags:**
|
| 448 |
+
|
| 449 |
+
| Flag | Default | Description |
|
| 450 |
+
|------|---------|-------------|
|
| 451 |
+
| `--hatch URL` | ws://localhost:9000 | CocoonHatch relay server URL |
|
| 452 |
+
| `--name NAME` | auto | Display name |
|
| 453 |
+
|
| 454 |
+
**In-Link Commands:**
|
| 455 |
+
|
| 456 |
+
| Command | Description |
|
| 457 |
+
|---------|-------------|
|
| 458 |
+
| `/users` | List online cocoons |
|
| 459 |
+
| `/challenge <name>` | Challenge a user to battle |
|
| 460 |
+
| `/accept <id>` | Accept a challenge |
|
| 461 |
+
| `/decline <id>` | Decline a challenge |
|
| 462 |
+
| `/chat <message>` | Send message to lobby |
|
| 463 |
+
| `/quit` | Disconnect |
|
| 464 |
+
|
| 465 |
+
**Requirements:** `pip install websockets`
|
| 466 |
+
|
| 467 |
+
---
|
| 468 |
+
|
| 469 |
+
### 🔬 Export & Conversion
|
| 470 |
+
|
| 471 |
+
| Command | Description |
|
| 472 |
+
|---------|-------------|
|
| 473 |
+
| `python cocoon.py --export evolved.py` | Export updated cocoon with learned state |
|
| 474 |
+
| `python cocoon.py --export-onnx brain.onnx` | Export to ONNX (all brains as ensemble) |
|
| 475 |
+
| `python cocoon.py --export-torchscript brain.pt` | Export to TorchScript (all brains as ensemble) |
|
| 476 |
+
| `python cocoon.py --export-onnx brain.onnx --organism 0` | Export single organism to ONNX |
|
| 477 |
+
| `python cocoon.py --export-torchscript brain.pt --organism 0` | Export single organism to TorchScript |
|
| 478 |
+
| `python cocoon.py --export-package ./my_model` | Export full package (ONNX + README + metadata) |
|
| 479 |
+
| `python cocoon.py --unpack ./output_dir` | Unpack ultimate package assets |
|
| 480 |
+
| `python cocoon.py --readme` | Print embedded README and exit |
|
| 481 |
+
|
| 482 |
+
**TorchScript vs ONNX:**
|
| 483 |
+
| Format | Continued Learning | Portability | Best For |
|
| 484 |
+
|--------|-------------------|-------------|----------|
|
| 485 |
+
| `.pt` (TorchScript) | ✅ Yes - can fine-tune | PyTorch/LibTorch/C++ | Research, fine-tuning |
|
| 486 |
+
| `.onnx` (ONNX) | ❌ Inference only | Universal (C++, JS, Rust, etc.) | Production deployment |
|
| 487 |
+
|
| 488 |
+
---
|
| 489 |
+
|
| 490 |
+
### 📦 Files Created by `--unpack`
|
| 491 |
+
|
| 492 |
+
Spawns a complete deployment package:
|
| 493 |
+
|
| 494 |
+
```
|
| 495 |
+
output_dir/
|
| 496 |
+
├── README.md # This documentation
|
| 497 |
+
├── cocoon_tmrl_adapter.py # TrackMania 2020 adapter (if embedded)
|
| 498 |
+
├── cocoon_drone_adapter.py # Drone Warfare adapter (if embedded)
|
| 499 |
+
├── cocoon_drone_arena.py # Full 8-mode drone arena (if embedded)
|
| 500 |
+
├── jsbsim_quadcopter.py # NASA JSBSim 6-DOF physics (if embedded)
|
| 501 |
+
├── vocabulary.json # Token vocabulary
|
| 502 |
+
├── metadata.json # Export metadata + organism info
|
| 503 |
+
├── requirements.txt # Python dependencies
|
| 504 |
+
├── ensemble.onnx # ONNX model (all brains unified)
|
| 505 |
+
└── ensemble_weights.pt # PyTorch weights bundle
|
| 506 |
+
```
|
| 507 |
+
|
| 508 |
+
---
|
| 509 |
+
|
| 510 |
+
### 📦 Files Created by `--export-package`
|
| 511 |
+
|
| 512 |
+
Netron-viewable package with ONNX models and model card:
|
| 513 |
+
|
| 514 |
+
```
|
| 515 |
+
my_model/
|
| 516 |
+
├── brain_ensemble.onnx # Combined ONNX (all brains unified)
|
| 517 |
+
├── brain_*.onnx # Individual organism ONNX files
|
| 518 |
+
├── vocabulary.json # Token vocabulary
|
| 519 |
+
├── metadata.json # Full configuration + fitness + architecture
|
| 520 |
+
└── README.md # Model card documentation
|
| 521 |
+
```
|
| 522 |
+
|
| 523 |
+
*Note: To get the full cocoon.py + requirements.txt, use `--unpack` instead.*
|
| 524 |
+
|
| 525 |
+
---
|
| 526 |
+
|
| 527 |
+
### ⚙️ Global Options
|
| 528 |
+
|
| 529 |
+
These flags work with any mode:
|
| 530 |
+
|
| 531 |
+
| Flag | Default | Description |
|
| 532 |
+
|------|---------|-------------|
|
| 533 |
+
| `--voting MODE` | confidence | Ensemble voting: `majority`, `weighted`, `confidence` |
|
| 534 |
+
| `--max-organisms N` | all | Limit organisms loaded (saves VRAM) |
|
| 535 |
+
| `--verbose` / `-v` | off | Enable verbose debug logging |
|
| 536 |
+
| `--help` | - | Show all available options |
|
| 537 |
+
|
| 538 |
+
**Examples:**
|
| 539 |
+
```bash
|
| 540 |
+
python cocoon.py --mode chat --max-organisms 5 # Load only 5 organisms
|
| 541 |
+
python cocoon.py --mode gym --voting majority # Use majority voting
|
| 542 |
+
python cocoon.py --mode chat --verbose # Debug output
|
| 543 |
+
```
|
| 544 |
+
|
| 545 |
+
---
|
| 546 |
+
|
| 547 |
+
## 📡 API Reference
|
| 548 |
+
|
| 549 |
+
### CocoonAgent
|
| 550 |
+
|
| 551 |
+
```python
|
| 552 |
+
from cocoon import CocoonAgent
|
| 553 |
+
|
| 554 |
+
agent = CocoonAgent()
|
| 555 |
+
|
| 556 |
+
# Get action from state (returns action_idx, {outputs dict})
|
| 557 |
+
action, outputs = agent.get_action(state_vector)
|
| 558 |
+
# outputs = {'action_probs': [...], 'value': float, 'language_logits': [...], 'vp': float}
|
| 559 |
+
|
| 560 |
+
# Process text input (for chat mode)
|
| 561 |
+
response = agent.process_input("Hello there!")
|
| 562 |
+
|
| 563 |
+
# Access subsystems
|
| 564 |
+
agent.atomic_lang.get_atoms_by_emotion(min_valence=0.5) # Get positive atoms
|
| 565 |
+
agent.conversation_history.get_summary() # Get conversation stats
|
| 566 |
+
agent.knowledge_web.get_related("concept", min_strength=0.3) # Get related concepts
|
| 567 |
+
agent.vp_runtime.compute_from_state(state) # Get VP value
|
| 568 |
+
```
|
| 569 |
+
|
| 570 |
+
### HTTP Endpoints (--mode serve)
|
| 571 |
+
|
| 572 |
+
| Endpoint | Method | Description |
|
| 573 |
+
|----------|--------|-------------|
|
| 574 |
+
| `/health` | GET | Health check |
|
| 575 |
+
| `/infer` | POST | `{"state": [...]}` → action |
|
| 576 |
+
| `/chat` | POST | `{"message": "..."}` → response |
|
| 577 |
+
| `/info` | GET | Agent metadata |
|
| 578 |
+
|
| 579 |
+
---
|
| 580 |
+
|
| 581 |
+
## 🔧 Dependencies
|
| 582 |
+
|
| 583 |
+
Minimal requirements:
|
| 584 |
+
```
|
| 585 |
+
torch>=2.0
|
| 586 |
+
numpy
|
| 587 |
+
```
|
| 588 |
+
|
| 589 |
+
Optional for HTTP serving:
|
| 590 |
+
```
|
| 591 |
+
flask # or fastapi + uvicorn
|
| 592 |
+
```
|
| 593 |
+
|
| 594 |
+
Optional for Gymnasium:
|
| 595 |
+
```
|
| 596 |
+
gymnasium
|
| 597 |
+
```
|
| 598 |
+
|
| 599 |
+
---
|
| 600 |
+
|
| 601 |
+
## 📦 Re-Exporting
|
| 602 |
+
|
| 603 |
+
The cocoon can re-export its neural models:
|
| 604 |
+
|
| 605 |
+
```python
|
| 606 |
+
from cocoon import CocoonAgent
|
| 607 |
+
|
| 608 |
+
agent = CocoonAgent()
|
| 609 |
+
|
| 610 |
+
# Export to ONNX for deployment
|
| 611 |
+
agent.export_onnx("brain.onnx")
|
| 612 |
+
|
| 613 |
+
# Export to TorchScript for C++/LibTorch
|
| 614 |
+
agent.export_torchscript("brain.pt")
|
| 615 |
+
|
| 616 |
+
# Save updated weights after learning
|
| 617 |
+
torch.save(agent.brain.state_dict(), "updated_weights.pth")
|
| 618 |
+
```
|
| 619 |
+
|
| 620 |
+
---
|
| 621 |
+
|
| 622 |
+
## 🦋 About the Butterfly System
|
| 623 |
+
|
| 624 |
+
This cocoon was generated by the **Butterfly Convergence Engine** - a neuro-symbolic AI framework that combines:
|
| 625 |
+
|
| 626 |
+
- **Neural networks** for pattern recognition and action selection
|
| 627 |
+
- **Atomic language** for grounded semantic understanding
|
| 628 |
+
- **VP regulation** (Vigilance × Plasticity) for adaptive attention
|
| 629 |
+
- **Knowledge webs** for relational reasoning
|
| 630 |
+
- **Distributed ensembles** for robust decision-making
|
| 631 |
+
|
| 632 |
+
Learn more: [Convergence Engine on GitHub](https://github.com/Yufok1/Convergence_Engine)
|
| 633 |
+
|
| 634 |
+
---
|
| 635 |
+
|
| 636 |
+
*Generated by 🦋 Butterfly Agent Compiler*
|
UNPACK/cocoon_drone_adapter.py
ADDED
|
@@ -0,0 +1,712 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
🛸 COCOON DRONE ADAPTER - Fly Drones with Exported Butterfly Cocoons
|
| 4 |
+
|
| 5 |
+
This adapter bridges your exported cocoon organisms to the NASA JSBSim-grade
|
| 6 |
+
drone simulation arena. Your Highlander-trained warriors can now fly!
|
| 7 |
+
|
| 8 |
+
ALL 8 GAME MODES:
|
| 9 |
+
FREE_FLY - Basic flight training
|
| 10 |
+
FORMATION - Swarm coordination (team)
|
| 11 |
+
PURSUIT - Chase moving targets
|
| 12 |
+
TAG_BATTLE - Combat: tag enemies, avoid being tagged
|
| 13 |
+
ZONE_CONTROL - Control airspace zones
|
| 14 |
+
CAPTURE_FLAG - Team objective game
|
| 15 |
+
SURVIVAL - Last drone flying wins
|
| 16 |
+
ESCORT - Protect VIP drone
|
| 17 |
+
|
| 18 |
+
SETUP OPTIONS:
|
| 19 |
+
|
| 20 |
+
Option A - Same folder as cocoon.py:
|
| 21 |
+
your_export_folder/
|
| 22 |
+
├── cocoon.py ← Your exported agent
|
| 23 |
+
└── cocoon_drone_adapter.py ← This file
|
| 24 |
+
|
| 25 |
+
Option B - Import the cocoon directly:
|
| 26 |
+
from your_export_folder.cocoon import CocoonAgent
|
| 27 |
+
from cocoon_drone_adapter import fly_drones, DroneArenaRunner
|
| 28 |
+
|
| 29 |
+
USAGE:
|
| 30 |
+
python cocoon_drone_adapter.py # Interactive mode picker
|
| 31 |
+
python cocoon_drone_adapter.py --mode tag_battle # Specific mode
|
| 32 |
+
python cocoon_drone_adapter.py --mode survival --time 180 # 3 min survival
|
| 33 |
+
python cocoon_drone_adapter.py --all # Run all modes sequentially
|
| 34 |
+
python cocoon_drone_adapter.py --visual # With 3D visualization (requires PyFlyt)
|
| 35 |
+
|
| 36 |
+
REQUIREMENTS:
|
| 37 |
+
- numpy, torch (bundled in cocoon.py)
|
| 38 |
+
- matplotlib (for trajectory plots)
|
| 39 |
+
- PyFlyt (optional, for 3D visualization: pip install PyFlyt)
|
| 40 |
+
|
| 41 |
+
Author: The Butterfly System / Convergence Engine
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
import sys
|
| 45 |
+
import os
|
| 46 |
+
import time
|
| 47 |
+
import argparse
|
| 48 |
+
import json
|
| 49 |
+
import numpy as np
|
| 50 |
+
from typing import Optional, Dict, Any, List, Tuple
|
| 51 |
+
from dataclasses import dataclass, field
|
| 52 |
+
from enum import Enum
|
| 53 |
+
|
| 54 |
+
# Fix Windows console encoding
|
| 55 |
+
if sys.platform == 'win32':
|
| 56 |
+
try:
|
| 57 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
| 58 |
+
except:
|
| 59 |
+
pass
|
| 60 |
+
|
| 61 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 62 |
+
# IMPORTS - Try local cocoon first, then from package
|
| 63 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 64 |
+
|
| 65 |
+
COCOON_AVAILABLE = False
|
| 66 |
+
CocoonAgent = None
|
| 67 |
+
|
| 68 |
+
def _load_cocoon():
|
| 69 |
+
"""Try to load cocoon from various locations."""
|
| 70 |
+
global COCOON_AVAILABLE, CocoonAgent
|
| 71 |
+
|
| 72 |
+
# Try 1: Local cocoon.py in same directory
|
| 73 |
+
try:
|
| 74 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 75 |
+
from cocoon import CocoonAgent as CA
|
| 76 |
+
CocoonAgent = CA
|
| 77 |
+
COCOON_AVAILABLE = True
|
| 78 |
+
print("✅ Loaded cocoon from local cocoon.py")
|
| 79 |
+
return True
|
| 80 |
+
except ImportError:
|
| 81 |
+
pass
|
| 82 |
+
|
| 83 |
+
# Try 2: Find any cocoon_ensemble_*.py
|
| 84 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 85 |
+
for f in os.listdir(current_dir):
|
| 86 |
+
if f.startswith('cocoon_ensemble_') and f.endswith('.py'):
|
| 87 |
+
try:
|
| 88 |
+
module_name = f[:-3]
|
| 89 |
+
import importlib.util
|
| 90 |
+
spec = importlib.util.spec_from_file_location(module_name, os.path.join(current_dir, f))
|
| 91 |
+
module = importlib.util.module_from_spec(spec)
|
| 92 |
+
spec.loader.exec_module(module)
|
| 93 |
+
CocoonAgent = module.CocoonAgent
|
| 94 |
+
COCOON_AVAILABLE = True
|
| 95 |
+
print(f"✅ Loaded cocoon from {f}")
|
| 96 |
+
return True
|
| 97 |
+
except:
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
# Try 3: From reality_simulator (development mode)
|
| 101 |
+
try:
|
| 102 |
+
from reality_simulator.agent_compiler import compile_cocoon_agent
|
| 103 |
+
print("⚠️ No cocoon.py found - will use compile_cocoon_agent for development")
|
| 104 |
+
COCOON_AVAILABLE = "compile"
|
| 105 |
+
return True
|
| 106 |
+
except ImportError:
|
| 107 |
+
pass
|
| 108 |
+
|
| 109 |
+
print("❌ No cocoon found. Export one first with: python butterfly_system.py --export")
|
| 110 |
+
return False
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 114 |
+
# DRONE ARENA INTEGRATION
|
| 115 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 116 |
+
|
| 117 |
+
# Try to import drone arena from various locations
|
| 118 |
+
ARENA_AVAILABLE = False
|
| 119 |
+
JSBSIM_PHYSICS_AVAILABLE = False
|
| 120 |
+
|
| 121 |
+
def _try_local_arena_import():
|
| 122 |
+
"""Try to import from local cocoon_drone_arena.py (from --unpack)."""
|
| 123 |
+
global ARENA_AVAILABLE, JSBSIM_PHYSICS_AVAILABLE
|
| 124 |
+
local_dir = os.path.dirname(os.path.abspath(__file__))
|
| 125 |
+
arena_path = os.path.join(local_dir, 'cocoon_drone_arena.py')
|
| 126 |
+
|
| 127 |
+
if os.path.exists(arena_path):
|
| 128 |
+
try:
|
| 129 |
+
import importlib.util
|
| 130 |
+
spec = importlib.util.spec_from_file_location('cocoon_drone_arena', arena_path)
|
| 131 |
+
module = importlib.util.module_from_spec(spec)
|
| 132 |
+
spec.loader.exec_module(module)
|
| 133 |
+
|
| 134 |
+
# Import to global namespace
|
| 135 |
+
globals()['CocoonDroneArena'] = module.CocoonDroneArena
|
| 136 |
+
globals()['DroneArenaConfig'] = module.DroneArenaConfig
|
| 137 |
+
globals()['DroneGameMode'] = module.DroneGameMode
|
| 138 |
+
globals()['DronePhysics'] = module.DronePhysics
|
| 139 |
+
globals()['DroneState'] = module.DroneState
|
| 140 |
+
globals()['GameState'] = module.GameState
|
| 141 |
+
JSBSIM_PHYSICS_AVAILABLE = getattr(module, 'JSBSIM_PHYSICS_AVAILABLE', False)
|
| 142 |
+
ARENA_AVAILABLE = True
|
| 143 |
+
print("✅ Loaded drone arena from local cocoon_drone_arena.py")
|
| 144 |
+
return True
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"⚠️ Failed to load local arena: {e}")
|
| 147 |
+
return False
|
| 148 |
+
|
| 149 |
+
def _try_package_arena_import():
|
| 150 |
+
"""Try to import from reality_simulator package."""
|
| 151 |
+
global ARENA_AVAILABLE, JSBSIM_PHYSICS_AVAILABLE
|
| 152 |
+
try:
|
| 153 |
+
from reality_simulator.arena.cocoon_drone_arena import (
|
| 154 |
+
CocoonDroneArena, DroneArenaConfig, DroneGameMode,
|
| 155 |
+
DronePhysics, DroneState, GameState, JSBSIM_PHYSICS_AVAILABLE as JSB
|
| 156 |
+
)
|
| 157 |
+
globals()['CocoonDroneArena'] = CocoonDroneArena
|
| 158 |
+
globals()['DroneArenaConfig'] = DroneArenaConfig
|
| 159 |
+
globals()['DroneGameMode'] = DroneGameMode
|
| 160 |
+
globals()['DronePhysics'] = DronePhysics
|
| 161 |
+
globals()['DroneState'] = DroneState
|
| 162 |
+
globals()['GameState'] = GameState
|
| 163 |
+
JSBSIM_PHYSICS_AVAILABLE = JSB
|
| 164 |
+
ARENA_AVAILABLE = True
|
| 165 |
+
print("✅ Loaded drone arena from reality_simulator package")
|
| 166 |
+
return True
|
| 167 |
+
except ImportError:
|
| 168 |
+
pass
|
| 169 |
+
|
| 170 |
+
# Try relative import (one dir up)
|
| 171 |
+
try:
|
| 172 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
| 173 |
+
from reality_simulator.arena.cocoon_drone_arena import (
|
| 174 |
+
CocoonDroneArena, DroneArenaConfig, DroneGameMode,
|
| 175 |
+
DronePhysics, DroneState, GameState, JSBSIM_PHYSICS_AVAILABLE as JSB
|
| 176 |
+
)
|
| 177 |
+
globals()['CocoonDroneArena'] = CocoonDroneArena
|
| 178 |
+
globals()['DroneArenaConfig'] = DroneArenaConfig
|
| 179 |
+
globals()['DroneGameMode'] = DroneGameMode
|
| 180 |
+
globals()['DronePhysics'] = DronePhysics
|
| 181 |
+
globals()['DroneState'] = DroneState
|
| 182 |
+
globals()['GameState'] = GameState
|
| 183 |
+
JSBSIM_PHYSICS_AVAILABLE = JSB
|
| 184 |
+
ARENA_AVAILABLE = True
|
| 185 |
+
return True
|
| 186 |
+
except ImportError:
|
| 187 |
+
pass
|
| 188 |
+
|
| 189 |
+
return False
|
| 190 |
+
|
| 191 |
+
# Try local first (standalone mode from --unpack), then package
|
| 192 |
+
if not _try_local_arena_import():
|
| 193 |
+
if not _try_package_arena_import():
|
| 194 |
+
print("⚠️ Drone arena not available - running in standalone mode")
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
# Visualization backends
|
| 198 |
+
MATPLOTLIB_AVAILABLE = False
|
| 199 |
+
PYFLYT_AVAILABLE = False
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
import matplotlib.pyplot as plt
|
| 203 |
+
from mpl_toolkits.mplot3d import Axes3D
|
| 204 |
+
MATPLOTLIB_AVAILABLE = True
|
| 205 |
+
except ImportError:
|
| 206 |
+
pass
|
| 207 |
+
|
| 208 |
+
try:
|
| 209 |
+
import gymnasium
|
| 210 |
+
import PyFlyt.gym_envs
|
| 211 |
+
PYFLYT_AVAILABLE = True
|
| 212 |
+
except ImportError:
|
| 213 |
+
pass
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 217 |
+
# GAME MODE DEFINITIONS
|
| 218 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 219 |
+
|
| 220 |
+
GAME_MODES = {
|
| 221 |
+
'free_fly': {
|
| 222 |
+
'name': 'Free Fly',
|
| 223 |
+
'description': 'Basic flight training - learn to hover, maneuver, land',
|
| 224 |
+
'emoji': '🕊️',
|
| 225 |
+
'team_game': False,
|
| 226 |
+
'default_time': 60,
|
| 227 |
+
},
|
| 228 |
+
'formation': {
|
| 229 |
+
'name': 'Formation',
|
| 230 |
+
'description': 'Maintain swarm formation - team coordination',
|
| 231 |
+
'emoji': '🔷',
|
| 232 |
+
'team_game': True,
|
| 233 |
+
'default_time': 90,
|
| 234 |
+
},
|
| 235 |
+
'pursuit': {
|
| 236 |
+
'name': 'Pursuit',
|
| 237 |
+
'description': 'Chase and intercept moving targets',
|
| 238 |
+
'emoji': '🎯',
|
| 239 |
+
'team_game': False,
|
| 240 |
+
'default_time': 60,
|
| 241 |
+
},
|
| 242 |
+
'tag_battle': {
|
| 243 |
+
'name': 'Tag Battle',
|
| 244 |
+
'description': 'Combat: tag enemies, evade being tagged',
|
| 245 |
+
'emoji': '⚔️',
|
| 246 |
+
'team_game': True,
|
| 247 |
+
'default_time': 120,
|
| 248 |
+
},
|
| 249 |
+
'zone_control': {
|
| 250 |
+
'name': 'Zone Control',
|
| 251 |
+
'description': 'Control airspace zones - team territory',
|
| 252 |
+
'emoji': '🏰',
|
| 253 |
+
'team_game': True,
|
| 254 |
+
'default_time': 120,
|
| 255 |
+
},
|
| 256 |
+
'capture_flag': {
|
| 257 |
+
'name': 'Capture the Flag',
|
| 258 |
+
'description': 'Team objective - capture enemy flag',
|
| 259 |
+
'emoji': '🚩',
|
| 260 |
+
'team_game': True,
|
| 261 |
+
'default_time': 180,
|
| 262 |
+
},
|
| 263 |
+
'survival': {
|
| 264 |
+
'name': 'Survival',
|
| 265 |
+
'description': 'Last drone flying wins - free for all',
|
| 266 |
+
'emoji': '💀',
|
| 267 |
+
'team_game': False,
|
| 268 |
+
'default_time': 180,
|
| 269 |
+
},
|
| 270 |
+
'escort': {
|
| 271 |
+
'name': 'Escort',
|
| 272 |
+
'description': 'Protect VIP drone from enemies',
|
| 273 |
+
'emoji': '🛡️',
|
| 274 |
+
'team_game': True,
|
| 275 |
+
'default_time': 120,
|
| 276 |
+
},
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 281 |
+
# DRONE ARENA RUNNER
|
| 282 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 283 |
+
|
| 284 |
+
@dataclass
|
| 285 |
+
class DroneRunResult:
|
| 286 |
+
"""Results from running a drone arena session."""
|
| 287 |
+
mode: str
|
| 288 |
+
duration: float
|
| 289 |
+
total_steps: int
|
| 290 |
+
blue_wins: int = 0
|
| 291 |
+
red_wins: int = 0
|
| 292 |
+
draws: int = 0
|
| 293 |
+
total_reward: float = 0.0
|
| 294 |
+
survivors: int = 0
|
| 295 |
+
trajectories: Dict[str, List[np.ndarray]] = field(default_factory=dict)
|
| 296 |
+
events: List[Dict[str, Any]] = field(default_factory=list)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
class DroneArenaRunner:
|
| 300 |
+
"""
|
| 301 |
+
Runs cocoon organisms in the drone arena.
|
| 302 |
+
|
| 303 |
+
Handles:
|
| 304 |
+
- Arena setup for each game mode
|
| 305 |
+
- Cocoon-to-drone action mapping
|
| 306 |
+
- Trajectory recording for visualization
|
| 307 |
+
- Results aggregation
|
| 308 |
+
"""
|
| 309 |
+
|
| 310 |
+
def __init__(self, cocoon_agent, num_drones: int = 8, visualize: bool = False):
|
| 311 |
+
"""
|
| 312 |
+
Args:
|
| 313 |
+
cocoon_agent: Loaded CocoonAgent instance
|
| 314 |
+
num_drones: Number of drones (splits into 2 teams for team games)
|
| 315 |
+
visualize: Enable real-time 3D visualization
|
| 316 |
+
"""
|
| 317 |
+
self.cocoon = cocoon_agent
|
| 318 |
+
self.num_drones = num_drones
|
| 319 |
+
self.visualize = visualize and PYFLYT_AVAILABLE
|
| 320 |
+
|
| 321 |
+
# Boost exploration for drone mode - cocoon needs to learn new domain
|
| 322 |
+
if hasattr(cocoon_agent, 'epsilon'):
|
| 323 |
+
cocoon_agent.epsilon = 0.5 # 50% random exploration to try different actions
|
| 324 |
+
print(f" 🎲 Epsilon boosted to 0.5 for drone exploration")
|
| 325 |
+
|
| 326 |
+
# Ensure cocoon has enough organisms
|
| 327 |
+
if hasattr(cocoon_agent, 'brains'):
|
| 328 |
+
available = len(cocoon_agent.brains)
|
| 329 |
+
if available < num_drones:
|
| 330 |
+
print(f"⚠️ Cocoon has {available} organisms, requested {num_drones}. Using {available}.")
|
| 331 |
+
self.num_drones = available
|
| 332 |
+
|
| 333 |
+
# Default config - using 10 FPS for faster simulation
|
| 334 |
+
# (ensemble voting takes ~20ms per drone, so 60 FPS is too slow)
|
| 335 |
+
self.config = DroneArenaConfig(
|
| 336 |
+
arena_size=100.0,
|
| 337 |
+
max_episode_steps=500, # ~50 seconds at 10 FPS
|
| 338 |
+
target_fps=10, # Reduced from 60 - ensemble inference is slow
|
| 339 |
+
) if ARENA_AVAILABLE else None
|
| 340 |
+
|
| 341 |
+
print(f"🛸 DroneArenaRunner initialized")
|
| 342 |
+
print(f" Organisms: {self.num_drones}")
|
| 343 |
+
print(f" Physics: {'NASA JSBSim' if JSBSIM_PHYSICS_AVAILABLE else 'Simplified'}")
|
| 344 |
+
print(f" Visualization: {'PyFlyt 3D' if self.visualize else 'Matplotlib trajectories'}")
|
| 345 |
+
|
| 346 |
+
def run_mode(self, mode: str, duration_seconds: float = None,
|
| 347 |
+
record_trajectories: bool = True) -> DroneRunResult:
|
| 348 |
+
"""
|
| 349 |
+
Run a specific game mode for a duration.
|
| 350 |
+
|
| 351 |
+
Args:
|
| 352 |
+
mode: Game mode name (e.g., 'tag_battle', 'survival')
|
| 353 |
+
duration_seconds: How long to run (uses mode default if None)
|
| 354 |
+
record_trajectories: Record drone positions for plotting
|
| 355 |
+
|
| 356 |
+
Returns:
|
| 357 |
+
DroneRunResult with statistics
|
| 358 |
+
"""
|
| 359 |
+
if not ARENA_AVAILABLE:
|
| 360 |
+
print(f"❌ Arena not available - cannot run {mode}")
|
| 361 |
+
return DroneRunResult(mode=mode, duration=0, total_steps=0)
|
| 362 |
+
|
| 363 |
+
mode_info = GAME_MODES.get(mode.lower())
|
| 364 |
+
if not mode_info:
|
| 365 |
+
print(f"❌ Unknown mode: {mode}")
|
| 366 |
+
return DroneRunResult(mode=mode, duration=0, total_steps=0)
|
| 367 |
+
|
| 368 |
+
duration = duration_seconds or mode_info['default_time']
|
| 369 |
+
|
| 370 |
+
print(f"\n{'='*60}")
|
| 371 |
+
print(f"{mode_info['emoji']} {mode_info['name'].upper()}")
|
| 372 |
+
print(f"{'='*60}")
|
| 373 |
+
print(f"Description: {mode_info['description']}")
|
| 374 |
+
print(f"Duration: {duration}s | Team game: {mode_info['team_game']}")
|
| 375 |
+
print()
|
| 376 |
+
|
| 377 |
+
# Map mode name to enum
|
| 378 |
+
mode_enum = DroneGameMode[mode.upper()]
|
| 379 |
+
|
| 380 |
+
# Create arena
|
| 381 |
+
arena = CocoonDroneArena(
|
| 382 |
+
cocoon=self.cocoon,
|
| 383 |
+
mode=mode_enum,
|
| 384 |
+
config=self.config,
|
| 385 |
+
team_split="half" if mode_info['team_game'] else "all_blue",
|
| 386 |
+
visualize=self.visualize,
|
| 387 |
+
verbose=False, # Less verbose for cleaner output
|
| 388 |
+
enable_training=True, # Let cocoon learn from drone experience!
|
| 389 |
+
train_interval=10 # Train every 10 steps
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
# Run simulation
|
| 393 |
+
start_time = time.time()
|
| 394 |
+
target_steps = int(duration * self.config.target_fps)
|
| 395 |
+
|
| 396 |
+
trajectories = {name: [] for name in arena.drones.keys()}
|
| 397 |
+
events = []
|
| 398 |
+
total_reward = 0.0
|
| 399 |
+
step = 0
|
| 400 |
+
|
| 401 |
+
print(f"Running {target_steps} steps ({duration}s at {self.config.target_fps} FPS)...")
|
| 402 |
+
print()
|
| 403 |
+
|
| 404 |
+
try:
|
| 405 |
+
while step < target_steps and not arena.game_state.finished:
|
| 406 |
+
# Step physics
|
| 407 |
+
rewards = arena.step()
|
| 408 |
+
total_reward += sum(rewards.values())
|
| 409 |
+
|
| 410 |
+
# Record trajectories
|
| 411 |
+
if record_trajectories and step % 10 == 0: # Every 10th frame
|
| 412 |
+
for name, drone in arena.drones.items():
|
| 413 |
+
if drone.alive:
|
| 414 |
+
trajectories[name].append(drone.position.copy())
|
| 415 |
+
|
| 416 |
+
# Progress display
|
| 417 |
+
if step % 600 == 0: # Every 10 seconds
|
| 418 |
+
elapsed = time.time() - start_time
|
| 419 |
+
alive = sum(1 for d in arena.drones.values() if d.alive)
|
| 420 |
+
blue = arena.game_state.blue_alive
|
| 421 |
+
red = arena.game_state.red_alive
|
| 422 |
+
print(f" [{elapsed:5.1f}s] Step {step:5d} | "
|
| 423 |
+
f"Blue: {blue} | Red: {red} | "
|
| 424 |
+
f"Reward: {total_reward:.1f}")
|
| 425 |
+
|
| 426 |
+
step += 1
|
| 427 |
+
|
| 428 |
+
except KeyboardInterrupt:
|
| 429 |
+
print("\n⏹️ Interrupted by user")
|
| 430 |
+
|
| 431 |
+
elapsed = time.time() - start_time
|
| 432 |
+
|
| 433 |
+
# Determine winner
|
| 434 |
+
gs = arena.game_state
|
| 435 |
+
blue_wins = 1 if gs.winner == "blue" else 0
|
| 436 |
+
red_wins = 1 if gs.winner == "red" else 0
|
| 437 |
+
draws = 1 if gs.winner == "draw" or gs.winner is None else 0
|
| 438 |
+
survivors = sum(1 for d in arena.drones.values() if d.alive)
|
| 439 |
+
|
| 440 |
+
# Convert trajectories to arrays
|
| 441 |
+
traj_arrays = {
|
| 442 |
+
name: np.array(pts) if pts else np.array([]).reshape(0, 3)
|
| 443 |
+
for name, pts in trajectories.items()
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
result = DroneRunResult(
|
| 447 |
+
mode=mode,
|
| 448 |
+
duration=elapsed,
|
| 449 |
+
total_steps=step,
|
| 450 |
+
blue_wins=blue_wins,
|
| 451 |
+
red_wins=red_wins,
|
| 452 |
+
draws=draws,
|
| 453 |
+
total_reward=total_reward,
|
| 454 |
+
survivors=survivors,
|
| 455 |
+
trajectories=traj_arrays,
|
| 456 |
+
events=events
|
| 457 |
+
)
|
| 458 |
+
|
| 459 |
+
# Print summary
|
| 460 |
+
print()
|
| 461 |
+
print(f"{'='*60}")
|
| 462 |
+
print(f"RESULTS: {mode_info['name']}")
|
| 463 |
+
print(f"{'='*60}")
|
| 464 |
+
print(f" Duration: {elapsed:.1f}s ({step} steps)")
|
| 465 |
+
print(f" Survivors: {survivors}/{self.num_drones}")
|
| 466 |
+
print(f" Total Reward: {total_reward:.2f}")
|
| 467 |
+
if mode_info['team_game']:
|
| 468 |
+
print(f" Winner: {gs.winner or 'None (ongoing)'}")
|
| 469 |
+
print(f" Blue alive: {gs.blue_alive} | Red alive: {gs.red_alive}")
|
| 470 |
+
|
| 471 |
+
return result
|
| 472 |
+
|
| 473 |
+
def run_all_modes(self, duration_per_mode: float = 60) -> List[DroneRunResult]:
|
| 474 |
+
"""Run all 8 game modes sequentially."""
|
| 475 |
+
results = []
|
| 476 |
+
|
| 477 |
+
print("\n" + "="*60)
|
| 478 |
+
print("🛸 RUNNING ALL DRONE GAME MODES")
|
| 479 |
+
print("="*60)
|
| 480 |
+
|
| 481 |
+
for mode_key in GAME_MODES.keys():
|
| 482 |
+
result = self.run_mode(mode_key, duration_seconds=duration_per_mode)
|
| 483 |
+
results.append(result)
|
| 484 |
+
print()
|
| 485 |
+
|
| 486 |
+
# Summary
|
| 487 |
+
print("\n" + "="*60)
|
| 488 |
+
print("📊 ALL MODES SUMMARY")
|
| 489 |
+
print("="*60)
|
| 490 |
+
|
| 491 |
+
for r in results:
|
| 492 |
+
mode_info = GAME_MODES[r.mode]
|
| 493 |
+
status = "✅" if r.total_steps > 0 else "❌"
|
| 494 |
+
print(f" {status} {mode_info['emoji']} {mode_info['name']:15} | "
|
| 495 |
+
f"{r.duration:5.1f}s | Survivors: {r.survivors} | Reward: {r.total_reward:.1f}")
|
| 496 |
+
|
| 497 |
+
return results
|
| 498 |
+
|
| 499 |
+
def plot_trajectories(self, result: DroneRunResult, save_path: str = None):
|
| 500 |
+
"""Plot drone trajectories from a run."""
|
| 501 |
+
if not MATPLOTLIB_AVAILABLE:
|
| 502 |
+
print("❌ Matplotlib not available for plotting")
|
| 503 |
+
return
|
| 504 |
+
|
| 505 |
+
fig = plt.figure(figsize=(12, 9))
|
| 506 |
+
ax = fig.add_subplot(111, projection='3d')
|
| 507 |
+
|
| 508 |
+
mode_info = GAME_MODES.get(result.mode, {})
|
| 509 |
+
|
| 510 |
+
colors = {'blue': 'blue', 'red': 'red'}
|
| 511 |
+
|
| 512 |
+
for drone_name, trajectory in result.trajectories.items():
|
| 513 |
+
if len(trajectory) == 0:
|
| 514 |
+
continue
|
| 515 |
+
|
| 516 |
+
# Determine team color
|
| 517 |
+
team = 'blue' if 'org_0' <= drone_name <= 'org_3' else 'red'
|
| 518 |
+
color = colors.get(team, 'gray')
|
| 519 |
+
|
| 520 |
+
ax.plot(trajectory[:, 0], trajectory[:, 1], trajectory[:, 2],
|
| 521 |
+
color=color, alpha=0.7, linewidth=1.5, label=drone_name)
|
| 522 |
+
|
| 523 |
+
# Start/end markers
|
| 524 |
+
ax.scatter(*trajectory[0], color='green', s=50, marker='o')
|
| 525 |
+
ax.scatter(*trajectory[-1], color=color, s=50, marker='x')
|
| 526 |
+
|
| 527 |
+
ax.set_xlabel('X (m)')
|
| 528 |
+
ax.set_ylabel('Y (m)')
|
| 529 |
+
ax.set_zlabel('Altitude (m)')
|
| 530 |
+
ax.set_title(f"{mode_info.get('emoji', '🛸')} {mode_info.get('name', result.mode)} - "
|
| 531 |
+
f"Drone Trajectories ({result.duration:.0f}s)")
|
| 532 |
+
|
| 533 |
+
# Ground plane
|
| 534 |
+
arena_half = self.config.arena_size / 2 if self.config else 50
|
| 535 |
+
xx, yy = np.meshgrid(
|
| 536 |
+
np.linspace(-arena_half, arena_half, 10),
|
| 537 |
+
np.linspace(-arena_half, arena_half, 10)
|
| 538 |
+
)
|
| 539 |
+
ax.plot_surface(xx, yy, np.zeros_like(xx), alpha=0.1, color='green')
|
| 540 |
+
|
| 541 |
+
plt.tight_layout()
|
| 542 |
+
|
| 543 |
+
if save_path:
|
| 544 |
+
plt.savefig(save_path, dpi=150)
|
| 545 |
+
print(f"📊 Saved trajectory plot: {save_path}")
|
| 546 |
+
else:
|
| 547 |
+
plt.show()
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 551 |
+
# MAIN ENTRY POINTS
|
| 552 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 553 |
+
|
| 554 |
+
def fly_drones(cocoon_agent=None, mode: str = 'tag_battle',
|
| 555 |
+
duration: float = None, visualize: bool = False,
|
| 556 |
+
num_drones: int = 8, plot: bool = True) -> DroneRunResult:
|
| 557 |
+
"""
|
| 558 |
+
Convenient function to fly drones with a cocoon.
|
| 559 |
+
|
| 560 |
+
Args:
|
| 561 |
+
cocoon_agent: CocoonAgent instance (loads from cocoon.py if None)
|
| 562 |
+
mode: Game mode to run
|
| 563 |
+
duration: Duration in seconds (uses mode default if None)
|
| 564 |
+
visualize: Enable 3D visualization
|
| 565 |
+
num_drones: Number of drones
|
| 566 |
+
plot: Show trajectory plot after
|
| 567 |
+
|
| 568 |
+
Returns:
|
| 569 |
+
DroneRunResult
|
| 570 |
+
"""
|
| 571 |
+
# Load cocoon if needed
|
| 572 |
+
if cocoon_agent is None:
|
| 573 |
+
_load_cocoon()
|
| 574 |
+
if not COCOON_AVAILABLE:
|
| 575 |
+
raise RuntimeError("No cocoon available")
|
| 576 |
+
cocoon_agent = CocoonAgent()
|
| 577 |
+
|
| 578 |
+
runner = DroneArenaRunner(cocoon_agent, num_drones=num_drones, visualize=visualize)
|
| 579 |
+
result = runner.run_mode(mode, duration_seconds=duration)
|
| 580 |
+
|
| 581 |
+
if plot and MATPLOTLIB_AVAILABLE:
|
| 582 |
+
runner.plot_trajectories(result)
|
| 583 |
+
|
| 584 |
+
return result
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
def interactive_mode():
|
| 588 |
+
"""Interactive mode picker."""
|
| 589 |
+
_load_cocoon()
|
| 590 |
+
|
| 591 |
+
if not COCOON_AVAILABLE:
|
| 592 |
+
print("\n❌ No cocoon found. Options:")
|
| 593 |
+
print(" 1. Export a cocoon: python butterfly_system.py --export")
|
| 594 |
+
print(" 2. Put cocoon.py in this folder")
|
| 595 |
+
return
|
| 596 |
+
|
| 597 |
+
print("\n" + "="*60)
|
| 598 |
+
print("🛸 COCOON DRONE ARENA - Mode Selection")
|
| 599 |
+
print("="*60)
|
| 600 |
+
print()
|
| 601 |
+
|
| 602 |
+
for i, (key, info) in enumerate(GAME_MODES.items(), 1):
|
| 603 |
+
print(f" {i}. {info['emoji']} {info['name']:15} - {info['description']}")
|
| 604 |
+
|
| 605 |
+
print()
|
| 606 |
+
print(" 9. Run ALL modes (60s each)")
|
| 607 |
+
print(" 0. Exit")
|
| 608 |
+
print()
|
| 609 |
+
|
| 610 |
+
try:
|
| 611 |
+
choice = input("Select mode (1-8, 9=all, 0=exit): ").strip()
|
| 612 |
+
|
| 613 |
+
if choice == '0':
|
| 614 |
+
return
|
| 615 |
+
|
| 616 |
+
if choice == '9':
|
| 617 |
+
cocoon = CocoonAgent()
|
| 618 |
+
runner = DroneArenaRunner(cocoon)
|
| 619 |
+
runner.run_all_modes(duration_per_mode=60)
|
| 620 |
+
return
|
| 621 |
+
|
| 622 |
+
mode_idx = int(choice) - 1
|
| 623 |
+
if 0 <= mode_idx < len(GAME_MODES):
|
| 624 |
+
mode_key = list(GAME_MODES.keys())[mode_idx]
|
| 625 |
+
mode_info = GAME_MODES[mode_key]
|
| 626 |
+
|
| 627 |
+
duration = input(f"Duration in seconds [{mode_info['default_time']}]: ").strip()
|
| 628 |
+
duration = int(duration) if duration else mode_info['default_time']
|
| 629 |
+
|
| 630 |
+
cocoon = CocoonAgent()
|
| 631 |
+
result = fly_drones(cocoon, mode=mode_key, duration=duration)
|
| 632 |
+
|
| 633 |
+
else:
|
| 634 |
+
print("Invalid selection")
|
| 635 |
+
|
| 636 |
+
except KeyboardInterrupt:
|
| 637 |
+
print("\n👋 Goodbye!")
|
| 638 |
+
except Exception as e:
|
| 639 |
+
print(f"Error: {e}")
|
| 640 |
+
|
| 641 |
+
|
| 642 |
+
def main():
|
| 643 |
+
parser = argparse.ArgumentParser(
|
| 644 |
+
description="🛸 Fly drones with your exported cocoon",
|
| 645 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 646 |
+
epilog="""
|
| 647 |
+
Examples:
|
| 648 |
+
python cocoon_drone_adapter.py # Interactive mode
|
| 649 |
+
python cocoon_drone_adapter.py --mode survival # Run survival mode
|
| 650 |
+
python cocoon_drone_adapter.py --mode tag_battle --time 180 # 3 min battle
|
| 651 |
+
python cocoon_drone_adapter.py --all # All modes, 60s each
|
| 652 |
+
python cocoon_drone_adapter.py --all --time 180 # All modes, 3 min each
|
| 653 |
+
"""
|
| 654 |
+
)
|
| 655 |
+
|
| 656 |
+
parser.add_argument('--mode', '-m', type=str,
|
| 657 |
+
choices=list(GAME_MODES.keys()),
|
| 658 |
+
help='Game mode to run')
|
| 659 |
+
parser.add_argument('--time', '-t', type=int, default=None,
|
| 660 |
+
help='Duration in seconds')
|
| 661 |
+
parser.add_argument('--all', '-a', action='store_true',
|
| 662 |
+
help='Run all game modes')
|
| 663 |
+
parser.add_argument('--visual', '-v', action='store_true',
|
| 664 |
+
help='Enable 3D visualization (requires PyFlyt)')
|
| 665 |
+
parser.add_argument('--drones', '-d', type=int, default=8,
|
| 666 |
+
help='Number of drones (default: 8)')
|
| 667 |
+
parser.add_argument('--no-plot', action='store_true',
|
| 668 |
+
help='Skip trajectory plot')
|
| 669 |
+
parser.add_argument('--save-plot', type=str, default=None,
|
| 670 |
+
help='Save trajectory plot to file')
|
| 671 |
+
|
| 672 |
+
args = parser.parse_args()
|
| 673 |
+
|
| 674 |
+
# Check dependencies
|
| 675 |
+
print("🛸 COCOON DRONE ADAPTER")
|
| 676 |
+
print("="*60)
|
| 677 |
+
print(f"Arena: {'✅' if ARENA_AVAILABLE else '❌'}")
|
| 678 |
+
print(f"JSBSim Physics: {'✅' if JSBSIM_PHYSICS_AVAILABLE else '⚠️ (using fallback)'}")
|
| 679 |
+
print(f"Matplotlib: {'✅' if MATPLOTLIB_AVAILABLE else '❌'}")
|
| 680 |
+
print(f"PyFlyt 3D: {'✅' if PYFLYT_AVAILABLE else '❌ (pip install PyFlyt)'}")
|
| 681 |
+
|
| 682 |
+
if args.all:
|
| 683 |
+
# Run all modes
|
| 684 |
+
_load_cocoon()
|
| 685 |
+
if not COCOON_AVAILABLE:
|
| 686 |
+
print("❌ No cocoon available")
|
| 687 |
+
return
|
| 688 |
+
|
| 689 |
+
cocoon = CocoonAgent()
|
| 690 |
+
runner = DroneArenaRunner(cocoon, num_drones=args.drones, visualize=args.visual)
|
| 691 |
+
runner.run_all_modes(duration_per_mode=args.time or 60)
|
| 692 |
+
|
| 693 |
+
elif args.mode:
|
| 694 |
+
# Run specific mode
|
| 695 |
+
_load_cocoon()
|
| 696 |
+
if not COCOON_AVAILABLE:
|
| 697 |
+
print("❌ No cocoon available")
|
| 698 |
+
return
|
| 699 |
+
|
| 700 |
+
cocoon = CocoonAgent()
|
| 701 |
+
runner = DroneArenaRunner(cocoon, num_drones=args.drones, visualize=args.visual)
|
| 702 |
+
result = runner.run_mode(args.mode, duration_seconds=args.time)
|
| 703 |
+
|
| 704 |
+
if not args.no_plot and MATPLOTLIB_AVAILABLE:
|
| 705 |
+
runner.plot_trajectories(result, save_path=args.save_plot)
|
| 706 |
+
else:
|
| 707 |
+
# Interactive mode
|
| 708 |
+
interactive_mode()
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
if __name__ == "__main__":
|
| 712 |
+
main()
|
UNPACK/cocoon_drone_arena.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
UNPACK/cocoon_tmrl_adapter.py
ADDED
|
@@ -0,0 +1,1724 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
🏎️ COCOON TMRL ADAPTER - Drive TrackMania with Exported Butterfly Cocoons
|
| 4 |
+
|
| 5 |
+
This adapter bridges your exported cocoon organisms to TMRL (TrackMania RL).
|
| 6 |
+
Your Highlander-trained warriors can now race in TrackMania 2020!
|
| 7 |
+
|
| 8 |
+
SETUP OPTIONS:
|
| 9 |
+
|
| 10 |
+
Option A - Same folder as cocoon.py:
|
| 11 |
+
your_export_folder/
|
| 12 |
+
├── cocoon.py ← Your exported agent
|
| 13 |
+
└── cocoon_tmrl_adapter.py ← This file
|
| 14 |
+
|
| 15 |
+
Option B - Import the cocoon directly:
|
| 16 |
+
from your_export_folder.cocoon import CocoonAgent
|
| 17 |
+
from cocoon_tmrl_adapter import CocoonActorModule, drive_trackmania
|
| 18 |
+
|
| 19 |
+
Option C - Standalone cocoon.py (single file export):
|
| 20 |
+
# Rename your cocoon_ensemble_*.py to cocoon.py, put in same folder
|
| 21 |
+
# OR pass the agent directly:
|
| 22 |
+
agent = CocoonAgent() # Load your cocoon however you want
|
| 23 |
+
drive_trackmania(cocoon_agent=agent)
|
| 24 |
+
|
| 25 |
+
USAGE:
|
| 26 |
+
python cocoon_tmrl_adapter.py # Interactive mode
|
| 27 |
+
python cocoon_tmrl_adapter.py --drive # Start driving in TrackMania
|
| 28 |
+
python cocoon_tmrl_adapter.py --organism 3 # Use specific organism brain
|
| 29 |
+
|
| 30 |
+
REQUIREMENTS:
|
| 31 |
+
- tmrl (pip install tmrl)
|
| 32 |
+
- TrackMania 2020 (with OpenPlanet plugin for TMRL)
|
| 33 |
+
- Your exported cocoon.py (in same folder OR passed as argument)
|
| 34 |
+
|
| 35 |
+
Author: The Butterfly System / Convergence Engine
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
import sys
|
| 39 |
+
import os
|
| 40 |
+
import threading
|
| 41 |
+
import queue
|
| 42 |
+
|
| 43 |
+
# Fix Windows console encoding for emojis
|
| 44 |
+
if sys.platform == 'win32':
|
| 45 |
+
try:
|
| 46 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
| 47 |
+
except:
|
| 48 |
+
pass # Python < 3.7
|
| 49 |
+
|
| 50 |
+
import numpy as np
|
| 51 |
+
import torch
|
| 52 |
+
from typing import Optional, List, Dict, Any
|
| 53 |
+
from dataclasses import dataclass
|
| 54 |
+
|
| 55 |
+
# TMRL imports - lazy load to avoid import chain issues
|
| 56 |
+
TMRL_AVAILABLE = False
|
| 57 |
+
RolloutWorker = None
|
| 58 |
+
GenericGymEnv = None
|
| 59 |
+
partial = None
|
| 60 |
+
cfg = None
|
| 61 |
+
|
| 62 |
+
# Stub ActorModule for class definition (replaced when TMRL loads)
|
| 63 |
+
class _StubActorModule:
|
| 64 |
+
"""Stub class replaced by real ActorModule when TMRL loads."""
|
| 65 |
+
pass
|
| 66 |
+
|
| 67 |
+
ActorModule = _StubActorModule
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _json_default(obj):
|
| 71 |
+
"""Fallback serializer for numpy / torch objects when exporting cocoons."""
|
| 72 |
+
import numpy as _np
|
| 73 |
+
import torch as _torch
|
| 74 |
+
if isinstance(obj, (_np.integer,)):
|
| 75 |
+
return int(obj)
|
| 76 |
+
if isinstance(obj, (_np.floating,)):
|
| 77 |
+
return float(obj)
|
| 78 |
+
if isinstance(obj, _np.ndarray):
|
| 79 |
+
return obj.tolist()
|
| 80 |
+
if isinstance(obj, _torch.Tensor):
|
| 81 |
+
return obj.detach().cpu().tolist()
|
| 82 |
+
if isinstance(obj, set):
|
| 83 |
+
return list(obj)
|
| 84 |
+
if hasattr(obj, '__dict__'):
|
| 85 |
+
return obj.__dict__
|
| 86 |
+
raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _ensure_json_default(module):
|
| 90 |
+
"""Make sure the cocoon module exposes _json_default for export routines."""
|
| 91 |
+
if module is None:
|
| 92 |
+
return
|
| 93 |
+
if not hasattr(module, '_json_default'):
|
| 94 |
+
setattr(module, '_json_default', _json_default)
|
| 95 |
+
|
| 96 |
+
def _lazy_load_tmrl():
|
| 97 |
+
"""Load TMRL on demand to avoid import chain interrupts."""
|
| 98 |
+
global TMRL_AVAILABLE, ActorModule, RolloutWorker, GenericGymEnv, partial, cfg
|
| 99 |
+
if TMRL_AVAILABLE:
|
| 100 |
+
return True
|
| 101 |
+
try:
|
| 102 |
+
# Only load what we actually need - skip networking (heavy crypto deps)
|
| 103 |
+
from tmrl.actor import ActorModule as AM
|
| 104 |
+
# Skip: from tmrl.networking import RolloutWorker as RW
|
| 105 |
+
# Skip: from tmrl.envs import GenericGymEnv as GGE
|
| 106 |
+
from functools import partial as P
|
| 107 |
+
import tmrl.config.config_constants as CFG
|
| 108 |
+
ActorModule = AM
|
| 109 |
+
RolloutWorker = None # Not needed for local driving
|
| 110 |
+
GenericGymEnv = None # Not needed for local driving
|
| 111 |
+
partial = P
|
| 112 |
+
cfg = CFG
|
| 113 |
+
TMRL_AVAILABLE = True
|
| 114 |
+
return True
|
| 115 |
+
except ImportError:
|
| 116 |
+
print("[!] TMRL not installed in this Python environment.")
|
| 117 |
+
print(" Install with: python -m pip install tmrl")
|
| 118 |
+
_print_basic_setup_instructions()
|
| 119 |
+
return False
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"[!] TMRL import error: {e}")
|
| 122 |
+
return False
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _check_openplanet_ready(timeout_s: float = 2.0) -> bool:
|
| 126 |
+
"""Check whether OpenPlanet is responding with game data.
|
| 127 |
+
|
| 128 |
+
Returns True only if the OpenPlanet client returns non-empty data.
|
| 129 |
+
"""
|
| 130 |
+
if not TMRL_AVAILABLE:
|
| 131 |
+
return False
|
| 132 |
+
try:
|
| 133 |
+
import time as _time
|
| 134 |
+
import threading as _threading
|
| 135 |
+
from tmrl.custom.tm.utils.tools import TM2020OpenPlanetClient
|
| 136 |
+
|
| 137 |
+
# TM2020OpenPlanetClient spins a background thread; when OpenPlanet isn't
|
| 138 |
+
# running, it can throw ConnectionRefusedError in that thread which would
|
| 139 |
+
# otherwise spam the console. Temporarily silence that specific case.
|
| 140 |
+
old_hook = getattr(_threading, 'excepthook', None)
|
| 141 |
+
|
| 142 |
+
def _quiet_excepthook(args):
|
| 143 |
+
if isinstance(args.exc_value, ConnectionRefusedError):
|
| 144 |
+
return
|
| 145 |
+
if old_hook is not None:
|
| 146 |
+
old_hook(args)
|
| 147 |
+
|
| 148 |
+
if old_hook is not None:
|
| 149 |
+
_threading.excepthook = _quiet_excepthook
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
client = TM2020OpenPlanetClient()
|
| 153 |
+
_time.sleep(0.5)
|
| 154 |
+
data = client.retrieve_data(timeout=float(timeout_s))
|
| 155 |
+
return bool(data) and len(data) > 0
|
| 156 |
+
finally:
|
| 157 |
+
if old_hook is not None:
|
| 158 |
+
_threading.excepthook = old_hook
|
| 159 |
+
except Exception:
|
| 160 |
+
return False
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _doctor(cocoon_path: Optional[str] = None) -> int:
|
| 164 |
+
"""Beginner-friendly diagnostic that does NOT launch TrackMania."""
|
| 165 |
+
import platform
|
| 166 |
+
import glob
|
| 167 |
+
import importlib.util
|
| 168 |
+
|
| 169 |
+
print("\nDOCTOR MODE (no TrackMania launch)")
|
| 170 |
+
print("=" * 50)
|
| 171 |
+
print(f"Python: {sys.version.splitlines()[0]}")
|
| 172 |
+
print(f"OS: {platform.platform()}")
|
| 173 |
+
print(f"CWD: {os.getcwd()}")
|
| 174 |
+
print()
|
| 175 |
+
|
| 176 |
+
module = None
|
| 177 |
+
if cocoon_path:
|
| 178 |
+
cocoon_path = os.path.abspath(cocoon_path)
|
| 179 |
+
cocoon_dir = os.path.dirname(cocoon_path)
|
| 180 |
+
print(f"Cocoon path: {cocoon_path}")
|
| 181 |
+
if not os.path.isfile(cocoon_path):
|
| 182 |
+
print("❌ Cocoon file not found.")
|
| 183 |
+
if os.path.isdir(cocoon_dir):
|
| 184 |
+
candidates = sorted(glob.glob(os.path.join(cocoon_dir, "cocoon_*.py")))
|
| 185 |
+
if candidates:
|
| 186 |
+
print(" Found these nearby:")
|
| 187 |
+
for c in candidates[:10]:
|
| 188 |
+
print(f" - {os.path.basename(c)}")
|
| 189 |
+
print()
|
| 190 |
+
_print_basic_setup_instructions()
|
| 191 |
+
return 2
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
print("⏳ Loading cocoon module from file...")
|
| 195 |
+
mod_name = "_cocoon_from_path"
|
| 196 |
+
spec = importlib.util.spec_from_file_location(mod_name, cocoon_path)
|
| 197 |
+
if spec is None or spec.loader is None:
|
| 198 |
+
raise RuntimeError("Could not create import spec")
|
| 199 |
+
module = importlib.util.module_from_spec(spec)
|
| 200 |
+
sys.modules[mod_name] = module
|
| 201 |
+
spec.loader.exec_module(module)
|
| 202 |
+
_ensure_json_default(module)
|
| 203 |
+
if not hasattr(module, 'CocoonAgent'):
|
| 204 |
+
raise AttributeError("CocoonAgent not found in the cocoon module")
|
| 205 |
+
print("✅ Cocoon module imported")
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f"❌ Cocoon import failed: {e}")
|
| 208 |
+
import traceback
|
| 209 |
+
traceback.print_exc()
|
| 210 |
+
return 2
|
| 211 |
+
else:
|
| 212 |
+
_try_load_cocoon(quiet=True, scan_exports=True)
|
| 213 |
+
if COCOON_AVAILABLE:
|
| 214 |
+
print("✅ Cocoon auto-detected in current folder")
|
| 215 |
+
else:
|
| 216 |
+
print("⚠️ No cocoon auto-detected in current folder")
|
| 217 |
+
|
| 218 |
+
print("\n⏳ Checking TMRL...")
|
| 219 |
+
if not _lazy_load_tmrl():
|
| 220 |
+
print("❌ TMRL not ready")
|
| 221 |
+
return 3
|
| 222 |
+
print("✅ TMRL import OK")
|
| 223 |
+
|
| 224 |
+
print("\n⏳ Checking OpenPlanet data stream...")
|
| 225 |
+
if _check_openplanet_ready():
|
| 226 |
+
print("✅ OpenPlanet is streaming data (you appear to be on a track)")
|
| 227 |
+
else:
|
| 228 |
+
print("⚠️ No OpenPlanet data detected.")
|
| 229 |
+
print(" Common fixes:")
|
| 230 |
+
print(" - Launch TrackMania 2020")
|
| 231 |
+
print(" - Start a track (not the main menus)")
|
| 232 |
+
print(" - In OpenPlanet: F3 -> Developer -> (Re)load plugin -> TMRL Grab Data")
|
| 233 |
+
|
| 234 |
+
if module is not None:
|
| 235 |
+
try:
|
| 236 |
+
print("\n⏳ Instantiating CocoonAgent (sanity check)...")
|
| 237 |
+
agent = module.CocoonAgent()
|
| 238 |
+
brain_count = len(getattr(agent, 'brains', []) or [])
|
| 239 |
+
print(f"✅ CocoonAgent instantiated (brains={brain_count})")
|
| 240 |
+
except Exception as e:
|
| 241 |
+
print(f"⚠️ CocoonAgent instantiation failed: {e}")
|
| 242 |
+
|
| 243 |
+
print("\nDoctor done.")
|
| 244 |
+
return 0
|
| 245 |
+
|
| 246 |
+
# Local cocoon import - flexible loading
|
| 247 |
+
COCOON_AVAILABLE = False
|
| 248 |
+
CocoonAgent = None
|
| 249 |
+
|
| 250 |
+
def _try_load_cocoon(quiet: bool = True, scan_exports: bool = False):
|
| 251 |
+
"""Try various methods to load a cocoon.
|
| 252 |
+
|
| 253 |
+
This module is often imported for its helpers; avoid printing warnings at
|
| 254 |
+
import-time unless explicitly requested.
|
| 255 |
+
"""
|
| 256 |
+
global COCOON_AVAILABLE, CocoonAgent
|
| 257 |
+
|
| 258 |
+
# Method 1: Local cocoon.py in same folder
|
| 259 |
+
try:
|
| 260 |
+
import cocoon as cocoon_module
|
| 261 |
+
_ensure_json_default(cocoon_module)
|
| 262 |
+
from cocoon import CocoonAgent as CA
|
| 263 |
+
CocoonAgent = CA
|
| 264 |
+
COCOON_AVAILABLE = True
|
| 265 |
+
return
|
| 266 |
+
except ImportError:
|
| 267 |
+
pass
|
| 268 |
+
|
| 269 |
+
# Method 2: (optional) Look for exported cocoon_ensemble_*.py files
|
| 270 |
+
import glob
|
| 271 |
+
import importlib.util
|
| 272 |
+
import os
|
| 273 |
+
cocoon_files = glob.glob("cocoon_ensemble_*.py") if scan_exports else []
|
| 274 |
+
# Skip ourselves to prevent infinite recursion!
|
| 275 |
+
my_name = os.path.basename(__file__)
|
| 276 |
+
cocoon_files = [cf for cf in cocoon_files if os.path.basename(cf) != my_name]
|
| 277 |
+
for cf in cocoon_files:
|
| 278 |
+
try:
|
| 279 |
+
spec = importlib.util.spec_from_file_location("cocoon", cf)
|
| 280 |
+
module = importlib.util.module_from_spec(spec)
|
| 281 |
+
spec.loader.exec_module(module)
|
| 282 |
+
_ensure_json_default(module)
|
| 283 |
+
if hasattr(module, 'CocoonAgent'):
|
| 284 |
+
CocoonAgent = module.CocoonAgent
|
| 285 |
+
COCOON_AVAILABLE = True
|
| 286 |
+
print(f"[OK] Loaded cocoon from: {cf}")
|
| 287 |
+
return
|
| 288 |
+
except Exception:
|
| 289 |
+
continue
|
| 290 |
+
|
| 291 |
+
if not quiet:
|
| 292 |
+
print("[!] No cocoon found. Pass --cocoon path/to/cocoon.py or place cocoon.py in this folder.")
|
| 293 |
+
|
| 294 |
+
_try_load_cocoon(quiet=True, scan_exports=False)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def _print_basic_setup_instructions():
|
| 298 |
+
print("\nSETUP (super simple):")
|
| 299 |
+
print(" 1) Install TMRL into *this* Python:")
|
| 300 |
+
print(" python -m pip install tmrl")
|
| 301 |
+
print(" 2) Install/launch TrackMania 2020")
|
| 302 |
+
print(" 3) Install OpenPlanet + enable the TMRL plugin (\"TMRL Grab Data\")")
|
| 303 |
+
print(" 4) Start a track (NOT the menus), then run:")
|
| 304 |
+
print(" python cocoon_tmrl_adapter.py --drive --cocoon D:\\path\\to\\cocoon_*.py")
|
| 305 |
+
print("\nIf you're stuck, run:")
|
| 306 |
+
print(" python cocoon_tmrl_adapter.py --doctor --cocoon D:\\path\\to\\cocoon_*.py")
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
# =============================================================================
|
| 310 |
+
# URGENCY MODULATOR - Time Pressure System
|
| 311 |
+
# =============================================================================
|
| 312 |
+
|
| 313 |
+
@dataclass
|
| 314 |
+
class UrgencyModulator:
|
| 315 |
+
"""
|
| 316 |
+
Exponential urgency pressure that teaches organisms time-awareness.
|
| 317 |
+
|
| 318 |
+
As time elapses toward expected_time, urgency increases exponentially.
|
| 319 |
+
Positive rewards are diminished (less reward for slow progress).
|
| 320 |
+
Negative rewards are amplified (more punishment when time is short).
|
| 321 |
+
|
| 322 |
+
The urgency signal is also injected into the observation space so
|
| 323 |
+
organisms can learn to perceive time pressure directly.
|
| 324 |
+
"""
|
| 325 |
+
expected_time: float = 60.0 # Expected track completion time (seconds)
|
| 326 |
+
alpha: float = 2.0 # Exponential curve steepness
|
| 327 |
+
step_duration: float = 0.05 # Approximate seconds per step (TMRL default ~20Hz)
|
| 328 |
+
|
| 329 |
+
# Runtime state
|
| 330 |
+
elapsed_steps: int = 0
|
| 331 |
+
episode_start_time: float = 0.0
|
| 332 |
+
|
| 333 |
+
def reset(self):
|
| 334 |
+
"""Reset at episode start."""
|
| 335 |
+
import time
|
| 336 |
+
self.elapsed_steps = 0
|
| 337 |
+
self.episode_start_time = time.time()
|
| 338 |
+
|
| 339 |
+
def step(self) -> float:
|
| 340 |
+
"""Advance one step, return current urgency multiplier."""
|
| 341 |
+
self.elapsed_steps += 1
|
| 342 |
+
return self.get_urgency()
|
| 343 |
+
|
| 344 |
+
def get_elapsed_time(self) -> float:
|
| 345 |
+
"""Get elapsed time in seconds (estimate from steps)."""
|
| 346 |
+
return self.elapsed_steps * self.step_duration
|
| 347 |
+
|
| 348 |
+
def get_time_pressure(self) -> float:
|
| 349 |
+
"""Get normalized time pressure (0.0 = just started, 1.0 = at expected time)."""
|
| 350 |
+
return min(1.0, self.get_elapsed_time() / self.expected_time)
|
| 351 |
+
|
| 352 |
+
def get_urgency(self) -> float:
|
| 353 |
+
"""
|
| 354 |
+
Get exponential urgency multiplier.
|
| 355 |
+
|
| 356 |
+
At t=0: urgency = 1.0 (no pressure)
|
| 357 |
+
At t=expected: urgency = e^alpha (~7.4 for alpha=2.0)
|
| 358 |
+
At t=2*expected: urgency = e^(2*alpha) (~55 for alpha=2.0)
|
| 359 |
+
"""
|
| 360 |
+
import math
|
| 361 |
+
pressure = self.get_time_pressure()
|
| 362 |
+
return math.exp(self.alpha * pressure)
|
| 363 |
+
|
| 364 |
+
def shape_reward(self, base_reward: float) -> float:
|
| 365 |
+
"""
|
| 366 |
+
Shape reward based on urgency.
|
| 367 |
+
|
| 368 |
+
Positive rewards: diminished by urgency (slow progress = less reward)
|
| 369 |
+
Negative rewards: amplified by urgency (crashes are worse when time is short)
|
| 370 |
+
Zero rewards: slight negative based on urgency (standing still costs more over time)
|
| 371 |
+
"""
|
| 372 |
+
urgency = self.get_urgency()
|
| 373 |
+
|
| 374 |
+
if base_reward > 0:
|
| 375 |
+
# Diminish positive rewards as urgency increases
|
| 376 |
+
return base_reward / urgency
|
| 377 |
+
elif base_reward < 0:
|
| 378 |
+
# Amplify negative rewards as urgency increases
|
| 379 |
+
return base_reward * urgency
|
| 380 |
+
else:
|
| 381 |
+
# Zero reward = slight negative pressure (standing still is bad)
|
| 382 |
+
# Scale: -0.001 at start, -0.01 at expected time
|
| 383 |
+
return -0.001 * urgency
|
| 384 |
+
|
| 385 |
+
def get_observation_signals(self) -> Dict[str, float]:
|
| 386 |
+
"""Get urgency signals to inject into observation."""
|
| 387 |
+
return {
|
| 388 |
+
'time_pressure': self.get_time_pressure(),
|
| 389 |
+
'urgency_multiplier': self.get_urgency(),
|
| 390 |
+
'elapsed_steps': float(self.elapsed_steps),
|
| 391 |
+
'remaining_ratio': max(0.0, 1.0 - self.get_time_pressure()),
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
# =============================================================================
|
| 396 |
+
# TRAINABLE ADAPTERS - Bridge TMRL observations to organism brains
|
| 397 |
+
# =============================================================================
|
| 398 |
+
|
| 399 |
+
class InputAdapter(torch.nn.Module):
|
| 400 |
+
"""
|
| 401 |
+
Trainable adapter that translates TMRL observations to organism-compatible features.
|
| 402 |
+
|
| 403 |
+
TMRL sends ~83 floats (LIDAR rays, speed, etc.)
|
| 404 |
+
Organism brains expect ~28 floats (Pong-style features)
|
| 405 |
+
|
| 406 |
+
This adapter LEARNS the translation during training.
|
| 407 |
+
"""
|
| 408 |
+
def __init__(self, tmrl_obs_dim: int, organism_input_dim: int, hidden_dim: int = 64):
|
| 409 |
+
super().__init__()
|
| 410 |
+
self.tmrl_obs_dim = tmrl_obs_dim
|
| 411 |
+
self.organism_input_dim = organism_input_dim
|
| 412 |
+
|
| 413 |
+
# Two-layer MLP to transform observations
|
| 414 |
+
self.net = torch.nn.Sequential(
|
| 415 |
+
torch.nn.Linear(tmrl_obs_dim, hidden_dim),
|
| 416 |
+
torch.nn.ReLU(),
|
| 417 |
+
torch.nn.Linear(hidden_dim, hidden_dim),
|
| 418 |
+
torch.nn.ReLU(),
|
| 419 |
+
torch.nn.Linear(hidden_dim, organism_input_dim),
|
| 420 |
+
torch.nn.Tanh() # Normalize to [-1, 1] like game observations
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
# Initialize with small weights for stability
|
| 424 |
+
for m in self.net:
|
| 425 |
+
if isinstance(m, torch.nn.Linear):
|
| 426 |
+
torch.nn.init.xavier_uniform_(m.weight, gain=0.5)
|
| 427 |
+
torch.nn.init.zeros_(m.bias)
|
| 428 |
+
|
| 429 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 430 |
+
return self.net(x)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
class OutputAdapter(torch.nn.Module):
|
| 434 |
+
"""
|
| 435 |
+
Trainable adapter that translates organism actions to TMRL controls.
|
| 436 |
+
|
| 437 |
+
Organism brains output 4 discrete action probabilities (gas, brake, left, right)
|
| 438 |
+
TMRL expects continuous [gas, brake, steer] in specific ranges
|
| 439 |
+
|
| 440 |
+
This adapter LEARNS the best mapping during training.
|
| 441 |
+
"""
|
| 442 |
+
def __init__(self, organism_output_dim: int = 4, hidden_dim: int = 32):
|
| 443 |
+
super().__init__()
|
| 444 |
+
self.organism_output_dim = organism_output_dim
|
| 445 |
+
|
| 446 |
+
# Transform organism outputs to TMRL actions
|
| 447 |
+
self.net = torch.nn.Sequential(
|
| 448 |
+
torch.nn.Linear(organism_output_dim, hidden_dim),
|
| 449 |
+
torch.nn.ReLU(),
|
| 450 |
+
torch.nn.Linear(hidden_dim, 3), # [gas, brake, steer]
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
# Initialize to produce reasonable default outputs
|
| 454 |
+
for m in self.net:
|
| 455 |
+
if isinstance(m, torch.nn.Linear):
|
| 456 |
+
torch.nn.init.xavier_uniform_(m.weight, gain=0.5)
|
| 457 |
+
torch.nn.init.zeros_(m.bias)
|
| 458 |
+
|
| 459 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 460 |
+
raw = self.net(x)
|
| 461 |
+
# gas: sigmoid to [0, 1]
|
| 462 |
+
# brake: sigmoid to [0, 1]
|
| 463 |
+
# steer: tanh to [-1, 1]
|
| 464 |
+
gas = torch.sigmoid(raw[..., 0])
|
| 465 |
+
brake = torch.sigmoid(raw[..., 1])
|
| 466 |
+
steer = torch.tanh(raw[..., 2])
|
| 467 |
+
return torch.stack([gas, brake, steer], dim=-1)
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
# =============================================================================
|
| 471 |
+
# COCOON ACTOR MODULE - Bridge to TMRL
|
| 472 |
+
# =============================================================================
|
| 473 |
+
|
| 474 |
+
class CocoonActorModule:
|
| 475 |
+
"""
|
| 476 |
+
Wraps a Convergence Engine organism brain as a TMRL-compatible actor.
|
| 477 |
+
|
| 478 |
+
This allows Highlander-trained organisms to drive in TrackMania.
|
| 479 |
+
Implements the same interface as TMRL's ActorModule without inheriting from it.
|
| 480 |
+
|
| 481 |
+
Now includes TRAINABLE ADAPTERS that learn to translate:
|
| 482 |
+
- TMRL observations → organism-compatible features
|
| 483 |
+
- Organism outputs → TMRL continuous controls
|
| 484 |
+
"""
|
| 485 |
+
|
| 486 |
+
def __init__(self,
|
| 487 |
+
observation_space,
|
| 488 |
+
action_space,
|
| 489 |
+
cocoon_agent: Optional['CocoonAgent'] = None,
|
| 490 |
+
organism_idx: int = 0,
|
| 491 |
+
device: str = "cpu",
|
| 492 |
+
use_adapters: bool = True,
|
| 493 |
+
freeze_brains: bool = True):
|
| 494 |
+
"""
|
| 495 |
+
Args:
|
| 496 |
+
observation_space: TMRL observation space
|
| 497 |
+
action_space: TMRL action space (gas, brake, steer)
|
| 498 |
+
cocoon_agent: Your exported CocoonAgent
|
| 499 |
+
organism_idx: Which organism brain to use (0 = ensemble, >0 = specific)
|
| 500 |
+
device: "cpu" or "cuda"
|
| 501 |
+
use_adapters: Use trainable input/output adapters (required for good performance!)
|
| 502 |
+
freeze_brains: Freeze organism brains, only train adapters (recommended)
|
| 503 |
+
"""
|
| 504 |
+
self.observation_space = observation_space
|
| 505 |
+
self.action_space = action_space
|
| 506 |
+
|
| 507 |
+
self.cocoon = cocoon_agent or CocoonAgent()
|
| 508 |
+
self.organism_idx = organism_idx
|
| 509 |
+
self.device = device
|
| 510 |
+
self.use_adapters = use_adapters
|
| 511 |
+
self.freeze_brains = freeze_brains
|
| 512 |
+
|
| 513 |
+
# Action space info
|
| 514 |
+
self.act_dim = action_space.shape[0] # Usually 3: gas, brake, steer
|
| 515 |
+
self.act_low = action_space.low
|
| 516 |
+
self.act_high = action_space.high
|
| 517 |
+
|
| 518 |
+
# Urgency modulator (set externally for time-pressure signaling)
|
| 519 |
+
self.urgency: Optional[UrgencyModulator] = None
|
| 520 |
+
|
| 521 |
+
# Get the brain
|
| 522 |
+
if organism_idx > 0 and organism_idx <= len(self.cocoon.brains):
|
| 523 |
+
self.brain = self.cocoon.brains[organism_idx - 1]
|
| 524 |
+
print(f"🧠 Using organism #{organism_idx} brain")
|
| 525 |
+
else:
|
| 526 |
+
self.brain = None # Use ensemble voting
|
| 527 |
+
print(f"🧠 Using ensemble voting ({len(self.cocoon.brains)} brains)")
|
| 528 |
+
|
| 529 |
+
# Get brain architecture info
|
| 530 |
+
sample_brain = self.cocoon.brains[0]
|
| 531 |
+
self.organism_input_dim = getattr(sample_brain, 'input_dim', 30)
|
| 532 |
+
self.organism_output_dim = getattr(sample_brain, 'output_dim', 4)
|
| 533 |
+
|
| 534 |
+
# Move all brains to device
|
| 535 |
+
for brain in self.cocoon.brains:
|
| 536 |
+
brain.to(device)
|
| 537 |
+
if freeze_brains:
|
| 538 |
+
brain.eval()
|
| 539 |
+
for param in brain.parameters():
|
| 540 |
+
param.requires_grad = False
|
| 541 |
+
|
| 542 |
+
# Initialize adapters (created lazily when we know obs dimension)
|
| 543 |
+
self.input_adapter: Optional[InputAdapter] = None
|
| 544 |
+
self.output_adapter: Optional[OutputAdapter] = None
|
| 545 |
+
self._obs_dim_detected = False
|
| 546 |
+
|
| 547 |
+
if freeze_brains:
|
| 548 |
+
print(f" 🔒 Brains frozen (only adapters train)")
|
| 549 |
+
else:
|
| 550 |
+
print(f" 🔓 Full fine-tuning enabled")
|
| 551 |
+
|
| 552 |
+
def _ensure_adapters(self, obs_dim: int):
|
| 553 |
+
"""Create adapters once we know the observation dimension."""
|
| 554 |
+
if self._obs_dim_detected:
|
| 555 |
+
return
|
| 556 |
+
|
| 557 |
+
if self.use_adapters:
|
| 558 |
+
self.input_adapter = InputAdapter(
|
| 559 |
+
tmrl_obs_dim=obs_dim,
|
| 560 |
+
organism_input_dim=self.organism_input_dim,
|
| 561 |
+
hidden_dim=64
|
| 562 |
+
).to(self.device)
|
| 563 |
+
|
| 564 |
+
self.output_adapter = OutputAdapter(
|
| 565 |
+
organism_output_dim=self.organism_output_dim,
|
| 566 |
+
hidden_dim=32
|
| 567 |
+
).to(self.device)
|
| 568 |
+
|
| 569 |
+
print(f" 🔧 Input adapter: {obs_dim} → {self.organism_input_dim}")
|
| 570 |
+
print(f" 🔧 Output adapter: {self.organism_output_dim} → 3 (gas/brake/steer)")
|
| 571 |
+
|
| 572 |
+
self._obs_dim_detected = True
|
| 573 |
+
|
| 574 |
+
def _preprocess_obs(self, obs, include_urgency: bool = True) -> np.ndarray:
|
| 575 |
+
"""Convert TMRL observation to flat numpy array, optionally with urgency signals."""
|
| 576 |
+
if isinstance(obs, tuple):
|
| 577 |
+
# Tuple observation (e.g., LIDAR + speed + previous actions)
|
| 578 |
+
flat = []
|
| 579 |
+
for o in obs:
|
| 580 |
+
if isinstance(o, np.ndarray):
|
| 581 |
+
flat.append(o.flatten())
|
| 582 |
+
else:
|
| 583 |
+
flat.append(np.array([o]).flatten())
|
| 584 |
+
base = np.concatenate(flat).astype(np.float32)
|
| 585 |
+
elif isinstance(obs, dict):
|
| 586 |
+
base = np.concatenate([v.flatten() for v in obs.values()]).astype(np.float32)
|
| 587 |
+
else:
|
| 588 |
+
base = np.asarray(obs, dtype=np.float32).flatten()
|
| 589 |
+
|
| 590 |
+
# Append urgency signals if available
|
| 591 |
+
if include_urgency and self.urgency is not None:
|
| 592 |
+
signals = self.urgency.get_observation_signals()
|
| 593 |
+
urgency_vec = np.array([
|
| 594 |
+
signals['time_pressure'],
|
| 595 |
+
signals['urgency_multiplier'] / 10.0, # Normalize (~0.1 to ~1.0)
|
| 596 |
+
signals['remaining_ratio'],
|
| 597 |
+
], dtype=np.float32)
|
| 598 |
+
base = np.concatenate([base, urgency_vec])
|
| 599 |
+
|
| 600 |
+
return base
|
| 601 |
+
|
| 602 |
+
def _action_to_trackmania(self, raw_action: np.ndarray) -> np.ndarray:
|
| 603 |
+
"""
|
| 604 |
+
Convert organism output to TrackMania controls.
|
| 605 |
+
|
| 606 |
+
TrackMania expects:
|
| 607 |
+
- gas: 0 to 1
|
| 608 |
+
- brake: 0 to 1
|
| 609 |
+
- steer: -1 to 1
|
| 610 |
+
|
| 611 |
+
Organism outputs action probabilities for 4 discrete actions.
|
| 612 |
+
We treat THROTTLE and STEERING as INDEPENDENT axes:
|
| 613 |
+
|
| 614 |
+
Throttle axis: GAS (0) vs BRAKE (1)
|
| 615 |
+
Steering axis: LEFT (2) vs RIGHT (3)
|
| 616 |
+
|
| 617 |
+
This allows the ensemble to vote on throttle and steering separately!
|
| 618 |
+
"""
|
| 619 |
+
# Get softmax probabilities
|
| 620 |
+
logits = raw_action[:min(len(raw_action), 4)]
|
| 621 |
+
logits = logits - np.max(logits) # Numerical stability
|
| 622 |
+
probs = np.exp(logits)
|
| 623 |
+
probs = probs / (probs.sum() + 1e-8)
|
| 624 |
+
|
| 625 |
+
# Ensure we have 4 values
|
| 626 |
+
if len(probs) < 4:
|
| 627 |
+
probs = np.pad(probs, (0, 4 - len(probs)), constant_values=0.0)
|
| 628 |
+
|
| 629 |
+
gas_prob = probs[0]
|
| 630 |
+
brake_prob = probs[1]
|
| 631 |
+
left_prob = probs[2]
|
| 632 |
+
right_prob = probs[3]
|
| 633 |
+
|
| 634 |
+
# THROTTLE: Default to GAS for exploration
|
| 635 |
+
# Only brake when brake_prob significantly exceeds gas_prob
|
| 636 |
+
gas = 0.9 # Default: strong gas for exploration
|
| 637 |
+
brake = 0.0
|
| 638 |
+
|
| 639 |
+
# Brake only activates when brake_prob > gas_prob + threshold
|
| 640 |
+
brake_margin = brake_prob - gas_prob
|
| 641 |
+
if brake_margin > 0.1: # Needs 10% margin to start braking
|
| 642 |
+
brake = min(0.8, brake_margin * 2.0) # Scale brake strength
|
| 643 |
+
gas = max(0.3, 0.9 - brake_margin) # Reduce gas when braking
|
| 644 |
+
|
| 645 |
+
# STEERING: LEFT is negative, RIGHT is positive
|
| 646 |
+
steer_diff = right_prob - left_prob
|
| 647 |
+
steer = steer_diff * 2.5 # Scale up for responsiveness
|
| 648 |
+
steer = np.clip(steer, -1.0, 1.0)
|
| 649 |
+
|
| 650 |
+
# Reduce gas slightly when steering hard
|
| 651 |
+
steer_intensity = abs(steer)
|
| 652 |
+
if steer_intensity > 0.3:
|
| 653 |
+
gas = gas * (1.0 - 0.2 * steer_intensity)
|
| 654 |
+
|
| 655 |
+
return np.array([gas, brake, steer], dtype=np.float32)
|
| 656 |
+
|
| 657 |
+
def act(self, obs, test: bool = False) -> np.ndarray:
|
| 658 |
+
"""
|
| 659 |
+
Compute action from observation.
|
| 660 |
+
|
| 661 |
+
Args:
|
| 662 |
+
obs: TMRL observation (LIDAR, speed, etc.)
|
| 663 |
+
test: True during evaluation, False during training
|
| 664 |
+
|
| 665 |
+
Returns:
|
| 666 |
+
np.ndarray: [gas, brake, steer] actions
|
| 667 |
+
"""
|
| 668 |
+
# Preprocess observation to flat array
|
| 669 |
+
state = self._preprocess_obs(obs)
|
| 670 |
+
|
| 671 |
+
# Ensure adapters are initialized
|
| 672 |
+
self._ensure_adapters(len(state))
|
| 673 |
+
|
| 674 |
+
# Convert to tensor
|
| 675 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
| 676 |
+
|
| 677 |
+
# Apply input adapter if using adapters
|
| 678 |
+
if self.use_adapters and self.input_adapter is not None:
|
| 679 |
+
with torch.set_grad_enabled(self.training if hasattr(self, 'training') else False):
|
| 680 |
+
adapted_state = self.input_adapter(state_tensor)
|
| 681 |
+
else:
|
| 682 |
+
# Fallback: just truncate/pad to match brain input
|
| 683 |
+
adapted_state = state_tensor
|
| 684 |
+
|
| 685 |
+
# Get action from brain(s)
|
| 686 |
+
with torch.no_grad():
|
| 687 |
+
if self.brain:
|
| 688 |
+
# Single organism
|
| 689 |
+
output = self.brain(adapted_state, return_language_logits=False)
|
| 690 |
+
if isinstance(output, tuple):
|
| 691 |
+
output = output[0]
|
| 692 |
+
brain_output = output
|
| 693 |
+
winning_action = int(torch.argmax(output[:, :4]).item())
|
| 694 |
+
vote_counts = {winning_action: 1}
|
| 695 |
+
self._last_avg_probs = output[0, :4].cpu().numpy()
|
| 696 |
+
else:
|
| 697 |
+
# Ensemble: average all brain outputs
|
| 698 |
+
from collections import Counter
|
| 699 |
+
|
| 700 |
+
all_outputs = []
|
| 701 |
+
votes = []
|
| 702 |
+
|
| 703 |
+
for brain in self.cocoon.brains:
|
| 704 |
+
output = brain(adapted_state, return_language_logits=False)
|
| 705 |
+
if isinstance(output, tuple):
|
| 706 |
+
output = output[0]
|
| 707 |
+
all_outputs.append(output)
|
| 708 |
+
discrete = int(torch.argmax(output[:, :4]).item())
|
| 709 |
+
votes.append(discrete)
|
| 710 |
+
|
| 711 |
+
vote_counts = Counter(votes)
|
| 712 |
+
# Average all outputs
|
| 713 |
+
brain_output = torch.mean(torch.stack(all_outputs), dim=0)
|
| 714 |
+
self._last_avg_probs = brain_output[0, :4].cpu().numpy()
|
| 715 |
+
|
| 716 |
+
# Apply output adapter if using adapters
|
| 717 |
+
if self.use_adapters and self.output_adapter is not None:
|
| 718 |
+
with torch.set_grad_enabled(self.training if hasattr(self, 'training') else False):
|
| 719 |
+
action_tensor = self.output_adapter(brain_output[:, :4])
|
| 720 |
+
action = action_tensor.cpu().detach().numpy().squeeze()
|
| 721 |
+
else:
|
| 722 |
+
# Fallback: use heuristic mapping
|
| 723 |
+
raw_action = brain_output.cpu().numpy().squeeze()
|
| 724 |
+
action = self._action_to_trackmania(raw_action)
|
| 725 |
+
|
| 726 |
+
# Granular debug output
|
| 727 |
+
self._step_count = getattr(self, '_step_count', 0) + 1
|
| 728 |
+
if self._step_count % 5 == 0: # Every 5 steps
|
| 729 |
+
vote_str = ' '.join([f"{k}:{v}" for k,v in sorted(vote_counts.items())])
|
| 730 |
+
avg_probs = self._last_avg_probs
|
| 731 |
+
prob_str = f"G:{avg_probs[0]:.0%} B:{avg_probs[1]:.0%} L:{avg_probs[2]:.0%} R:{avg_probs[3]:.0%}"
|
| 732 |
+
adapter_str = "🔧" if self.use_adapters else "⚠️"
|
| 733 |
+
print(f" [{self._step_count:3d}] {adapter_str} Votes: {vote_str} | Avg: {prob_str} → gas={action[0]:.2f} brake={action[1]:.2f} steer={action[2]:+.2f}")
|
| 734 |
+
|
| 735 |
+
return action
|
| 736 |
+
|
| 737 |
+
def get_trainable_parameters(self):
|
| 738 |
+
"""Get parameters that should be trained (adapters only if brains frozen)."""
|
| 739 |
+
params = []
|
| 740 |
+
if self.input_adapter is not None:
|
| 741 |
+
params.extend(self.input_adapter.parameters())
|
| 742 |
+
if self.output_adapter is not None:
|
| 743 |
+
params.extend(self.output_adapter.parameters())
|
| 744 |
+
if not self.freeze_brains:
|
| 745 |
+
for brain in self.cocoon.brains:
|
| 746 |
+
params.extend(brain.parameters())
|
| 747 |
+
return params
|
| 748 |
+
|
| 749 |
+
def save(self, path):
|
| 750 |
+
"""Save the actor module including trained adapters."""
|
| 751 |
+
save_data = {
|
| 752 |
+
'organism_idx': self.organism_idx,
|
| 753 |
+
'device': self.device,
|
| 754 |
+
'use_adapters': self.use_adapters,
|
| 755 |
+
'freeze_brains': self.freeze_brains,
|
| 756 |
+
}
|
| 757 |
+
if self.input_adapter is not None:
|
| 758 |
+
save_data['input_adapter_state'] = self.input_adapter.state_dict()
|
| 759 |
+
if self.output_adapter is not None:
|
| 760 |
+
save_data['output_adapter_state'] = self.output_adapter.state_dict()
|
| 761 |
+
torch.save(save_data, path)
|
| 762 |
+
print(f"💾 Saved actor module with adapters to {path}")
|
| 763 |
+
|
| 764 |
+
def load(self, path, device):
|
| 765 |
+
"""Load the actor module including trained adapters."""
|
| 766 |
+
data = torch.load(path, map_location=device)
|
| 767 |
+
self.organism_idx = data.get('organism_idx', 0)
|
| 768 |
+
self.device = device
|
| 769 |
+
|
| 770 |
+
# Load adapter states if present
|
| 771 |
+
if 'input_adapter_state' in data and self.input_adapter is not None:
|
| 772 |
+
self.input_adapter.load_state_dict(data['input_adapter_state'])
|
| 773 |
+
print(f" ✅ Loaded trained input adapter")
|
| 774 |
+
if 'output_adapter_state' in data and self.output_adapter is not None:
|
| 775 |
+
self.output_adapter.load_state_dict(data['output_adapter_state'])
|
| 776 |
+
print(f" ✅ Loaded trained output adapter")
|
| 777 |
+
return self
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
# =============================================================================
|
| 781 |
+
# TMRL WORKER FACTORY
|
| 782 |
+
# =============================================================================
|
| 783 |
+
|
| 784 |
+
def create_tmrl_worker(
|
| 785 |
+
cocoon_agent: Optional['CocoonAgent'] = None,
|
| 786 |
+
organism_idx: int = 0,
|
| 787 |
+
server_ip: str = "127.0.0.1",
|
| 788 |
+
server_port: int = 6666,
|
| 789 |
+
run_name: str = "cocoon_trackmania",
|
| 790 |
+
device: str = "cpu"
|
| 791 |
+
) -> 'RolloutWorker':
|
| 792 |
+
"""
|
| 793 |
+
Create a TMRL RolloutWorker using a cocoon organism.
|
| 794 |
+
|
| 795 |
+
Args:
|
| 796 |
+
cocoon_agent: Your CocoonAgent (loads from cocoon.py if None)
|
| 797 |
+
organism_idx: Which organism to use (0 = ensemble)
|
| 798 |
+
server_ip: TMRL server IP
|
| 799 |
+
server_port: TMRL server port
|
| 800 |
+
run_name: Name for this run
|
| 801 |
+
device: "cpu" or "cuda"
|
| 802 |
+
|
| 803 |
+
Returns:
|
| 804 |
+
RolloutWorker ready to collect samples in TrackMania
|
| 805 |
+
"""
|
| 806 |
+
if not TMRL_AVAILABLE:
|
| 807 |
+
raise RuntimeError("TMRL not installed. Run: pip install tmrl")
|
| 808 |
+
|
| 809 |
+
# Load cocoon if not provided
|
| 810 |
+
agent = cocoon_agent or CocoonAgent()
|
| 811 |
+
|
| 812 |
+
# Create actor module factory
|
| 813 |
+
def actor_module_cls(observation_space, action_space):
|
| 814 |
+
return CocoonActorModule(
|
| 815 |
+
observation_space=observation_space,
|
| 816 |
+
action_space=action_space,
|
| 817 |
+
cocoon_agent=agent,
|
| 818 |
+
organism_idx=organism_idx,
|
| 819 |
+
device=device
|
| 820 |
+
)
|
| 821 |
+
|
| 822 |
+
# Environment (TrackMania with LIDAR)
|
| 823 |
+
env_cls = partial(
|
| 824 |
+
GenericGymEnv,
|
| 825 |
+
id="real-time-gym-v1",
|
| 826 |
+
gym_kwargs={"config": cfg.ENV_CONFIG}
|
| 827 |
+
)
|
| 828 |
+
|
| 829 |
+
# Paths
|
| 830 |
+
weights_folder = cfg.WEIGHTS_FOLDER
|
| 831 |
+
model_path = str(weights_folder / (run_name + ".tmod"))
|
| 832 |
+
|
| 833 |
+
# Create worker
|
| 834 |
+
worker = RolloutWorker(
|
| 835 |
+
env_cls=env_cls,
|
| 836 |
+
actor_module_cls=actor_module_cls,
|
| 837 |
+
sample_compressor=None,
|
| 838 |
+
device=device,
|
| 839 |
+
server_ip=server_ip,
|
| 840 |
+
server_port=server_port,
|
| 841 |
+
password=cfg.PASSWORD,
|
| 842 |
+
max_samples_per_episode=1000,
|
| 843 |
+
model_path=model_path,
|
| 844 |
+
crc_debug=False
|
| 845 |
+
)
|
| 846 |
+
|
| 847 |
+
return worker
|
| 848 |
+
|
| 849 |
+
|
| 850 |
+
# =============================================================================
|
| 851 |
+
# STANDALONE TRACKMANIA DRIVER
|
| 852 |
+
# =============================================================================
|
| 853 |
+
|
| 854 |
+
def drive_trackmania(
|
| 855 |
+
cocoon_agent: Optional['CocoonAgent'] = None,
|
| 856 |
+
organism_idx: int = 0,
|
| 857 |
+
episodes: int = 10,
|
| 858 |
+
render: bool = True,
|
| 859 |
+
device: str = "cpu",
|
| 860 |
+
enable_training: bool = False,
|
| 861 |
+
learning_rate: float = 1e-4,
|
| 862 |
+
batch_size: int = 32,
|
| 863 |
+
gamma: float = 0.99,
|
| 864 |
+
train_every: int = 4,
|
| 865 |
+
save_every: int = 10,
|
| 866 |
+
save_path: Optional[str] = None,
|
| 867 |
+
track_time: float = 60.0,
|
| 868 |
+
urgency_alpha: float = 2.0
|
| 869 |
+
) -> Dict[str, Any]:
|
| 870 |
+
"""
|
| 871 |
+
Drive in TrackMania using a cocoon organism (standalone mode).
|
| 872 |
+
Optionally train the organisms in-place using policy-gradient style updates.
|
| 873 |
+
|
| 874 |
+
Args:
|
| 875 |
+
cocoon_agent: Your CocoonAgent
|
| 876 |
+
organism_idx: Which organism (0 = ensemble)
|
| 877 |
+
episodes: Number of episodes to run
|
| 878 |
+
render: Show the game (should be True for TrackMania)
|
| 879 |
+
device: "cpu" or "cuda"
|
| 880 |
+
enable_training: If True, collect experience and update brains during drive
|
| 881 |
+
learning_rate: Optimizer learning rate when training
|
| 882 |
+
batch_size: Replay samples per gradient step
|
| 883 |
+
gamma: Reward discount for returns
|
| 884 |
+
train_every: Steps between optimization passes
|
| 885 |
+
save_every: Episodes between checkpoint saves
|
| 886 |
+
save_path: Optional custom export path for trained cocoon
|
| 887 |
+
|
| 888 |
+
Returns:
|
| 889 |
+
Dict with episode metrics and optional training stats
|
| 890 |
+
"""
|
| 891 |
+
if not TMRL_AVAILABLE:
|
| 892 |
+
raise RuntimeError("TMRL not installed. Run: pip install tmrl")
|
| 893 |
+
|
| 894 |
+
import gymnasium as gym
|
| 895 |
+
import subprocess
|
| 896 |
+
import time as time_module
|
| 897 |
+
|
| 898 |
+
# Helper to check if OpenPlanet is sending data (meaning we're on a track)
|
| 899 |
+
def check_openplanet_ready():
|
| 900 |
+
return _check_openplanet_ready(timeout_s=2.0)
|
| 901 |
+
|
| 902 |
+
# Helper to launch and focus TrackMania
|
| 903 |
+
def launch_and_focus_trackmania():
|
| 904 |
+
"""Launch TrackMania via Ubisoft Connect and focus the window. Returns state."""
|
| 905 |
+
try:
|
| 906 |
+
import ctypes
|
| 907 |
+
from ctypes import wintypes
|
| 908 |
+
|
| 909 |
+
# Find TrackMania window
|
| 910 |
+
user32 = ctypes.windll.user32
|
| 911 |
+
|
| 912 |
+
def find_window(title_part):
|
| 913 |
+
"""Find window by partial title match."""
|
| 914 |
+
hwnd_found = [None]
|
| 915 |
+
|
| 916 |
+
def enum_callback(hwnd, lparam):
|
| 917 |
+
length = user32.GetWindowTextLengthW(hwnd)
|
| 918 |
+
if length > 0:
|
| 919 |
+
buff = ctypes.create_unicode_buffer(length + 1)
|
| 920 |
+
user32.GetWindowTextW(hwnd, buff, length + 1)
|
| 921 |
+
if title_part.lower() in buff.value.lower():
|
| 922 |
+
hwnd_found[0] = hwnd
|
| 923 |
+
return False # Stop enumeration
|
| 924 |
+
return True
|
| 925 |
+
|
| 926 |
+
WNDENUMPROC = ctypes.WINFUNCTYPE(ctypes.c_bool, wintypes.HWND, wintypes.LPARAM)
|
| 927 |
+
user32.EnumWindows(WNDENUMPROC(enum_callback), 0)
|
| 928 |
+
return hwnd_found[0]
|
| 929 |
+
|
| 930 |
+
# Check if TrackMania is running
|
| 931 |
+
hwnd = find_window("Trackmania")
|
| 932 |
+
game_was_running = hwnd is not None
|
| 933 |
+
|
| 934 |
+
if not hwnd:
|
| 935 |
+
print("🚀 Launching TrackMania...")
|
| 936 |
+
# Try Ubisoft Connect URI
|
| 937 |
+
subprocess.Popen(
|
| 938 |
+
["cmd", "/c", "start", "uplay://launch/5595/0"],
|
| 939 |
+
shell=False,
|
| 940 |
+
stdout=subprocess.DEVNULL,
|
| 941 |
+
stderr=subprocess.DEVNULL
|
| 942 |
+
)
|
| 943 |
+
|
| 944 |
+
# Wait for game to start
|
| 945 |
+
for _ in range(60): # 60 second timeout
|
| 946 |
+
time_module.sleep(1)
|
| 947 |
+
hwnd = find_window("Trackmania")
|
| 948 |
+
if hwnd:
|
| 949 |
+
print("✅ TrackMania launched!")
|
| 950 |
+
print("⏳ Waiting for OpenPlanet to load (15s)...")
|
| 951 |
+
time_module.sleep(15) # OpenPlanet needs time to initialize
|
| 952 |
+
break
|
| 953 |
+
else:
|
| 954 |
+
print("[!] TrackMania did not start. Please launch manually.")
|
| 955 |
+
return None, False
|
| 956 |
+
else:
|
| 957 |
+
print("✅ TrackMania already running")
|
| 958 |
+
|
| 959 |
+
# DON'T auto-focus - let user control when to switch
|
| 960 |
+
# (focus stealing is annoying)
|
| 961 |
+
|
| 962 |
+
return hwnd, game_was_running
|
| 963 |
+
|
| 964 |
+
except Exception as e:
|
| 965 |
+
print(f"[!] Could not auto-launch TrackMania: {e}")
|
| 966 |
+
print(" Please launch TrackMania manually and focus the window.")
|
| 967 |
+
return None, False
|
| 968 |
+
|
| 969 |
+
# Load cocoon
|
| 970 |
+
agent = cocoon_agent or CocoonAgent()
|
| 971 |
+
|
| 972 |
+
# Launch and focus TrackMania first
|
| 973 |
+
hwnd, game_was_running = launch_and_focus_trackmania()
|
| 974 |
+
|
| 975 |
+
# Check if already on a track (OpenPlanet sending data)
|
| 976 |
+
if game_was_running:
|
| 977 |
+
print("🔍 Checking if you're on a track...")
|
| 978 |
+
if check_openplanet_ready():
|
| 979 |
+
print("✅ Already on a track! Starting immediately...")
|
| 980 |
+
else:
|
| 981 |
+
print("\n⚠️ You're in menus. Please start a race/track.")
|
| 982 |
+
print(" Press ENTER when you're on a track and ready...")
|
| 983 |
+
input()
|
| 984 |
+
else:
|
| 985 |
+
# Fresh launch - need to wait for user to get to a track
|
| 986 |
+
print("\n⚠️ IMPORTANT: You must be ON A TRACK (not in menus)!")
|
| 987 |
+
print(" Start any race/track, then the organisms will take over.")
|
| 988 |
+
print(" Press ENTER when you're on a track and ready...")
|
| 989 |
+
input()
|
| 990 |
+
|
| 991 |
+
# CRITICAL: Focus TrackMania window BEFORE sending inputs
|
| 992 |
+
print("🎯 Focusing TrackMania window...")
|
| 993 |
+
import time as time_mod
|
| 994 |
+
time_mod.sleep(0.3)
|
| 995 |
+
|
| 996 |
+
try:
|
| 997 |
+
import subprocess
|
| 998 |
+
# Use VBScript AppActivate - simple and reliable
|
| 999 |
+
vbs = 'CreateObject("WScript.Shell").AppActivate "Trackmania"'
|
| 1000 |
+
result = subprocess.run(
|
| 1001 |
+
["cscript", "//nologo", "//e:vbscript"],
|
| 1002 |
+
input=vbs, capture_output=True, text=True, timeout=3
|
| 1003 |
+
)
|
| 1004 |
+
if result.returncode == 0:
|
| 1005 |
+
print(" ✓ TrackMania focused!")
|
| 1006 |
+
time_mod.sleep(0.5)
|
| 1007 |
+
else:
|
| 1008 |
+
print(" ⚠️ Could not auto-focus")
|
| 1009 |
+
print(" >>> CLICK ON TRACKMANIA NOW (3 sec)! <<<")
|
| 1010 |
+
time_mod.sleep(3)
|
| 1011 |
+
except Exception as e:
|
| 1012 |
+
print(f" ⚠️ Focus failed: {e}")
|
| 1013 |
+
print(" >>> CLICK ON TRACKMANIA NOW (3 sec)! <<<")
|
| 1014 |
+
time_mod.sleep(3)
|
| 1015 |
+
|
| 1016 |
+
# Create TrackMania environment using LIDAR interface directly
|
| 1017 |
+
try:
|
| 1018 |
+
print("🔗 Connecting to TrackMania...")
|
| 1019 |
+
|
| 1020 |
+
# Just use TMRL's built-in environment
|
| 1021 |
+
from tmrl import get_environment
|
| 1022 |
+
import time as time_mod
|
| 1023 |
+
|
| 1024 |
+
print(" 📦 Calling get_environment()...")
|
| 1025 |
+
env = get_environment()
|
| 1026 |
+
print(" ✅ Environment created")
|
| 1027 |
+
|
| 1028 |
+
# User must reload plugin after TMRL resizes window
|
| 1029 |
+
print("\n" + "="*60)
|
| 1030 |
+
print(" ⚠️ TMRL may have resized your window")
|
| 1031 |
+
print(" If OpenPlanet stopped working:")
|
| 1032 |
+
print(" 1. Press F3")
|
| 1033 |
+
print(" 2. Developer → (Re)load plugin → TMRL Grab Data")
|
| 1034 |
+
print(" 3. Press F3 to close")
|
| 1035 |
+
print("="*60)
|
| 1036 |
+
input("\n Press ENTER to continue...")
|
| 1037 |
+
print()
|
| 1038 |
+
|
| 1039 |
+
print("✅ Connected to TrackMania!")
|
| 1040 |
+
except Exception as e:
|
| 1041 |
+
print(f"[!] Could not create TrackMania environment: {e}")
|
| 1042 |
+
print(" Make sure TrackMania 2020 is running with OpenPlanet plugin.")
|
| 1043 |
+
import traceback
|
| 1044 |
+
traceback.print_exc()
|
| 1045 |
+
return []
|
| 1046 |
+
|
| 1047 |
+
# Create actor with adapters
|
| 1048 |
+
actor = CocoonActorModule(
|
| 1049 |
+
observation_space=env.observation_space,
|
| 1050 |
+
action_space=env.action_space,
|
| 1051 |
+
cocoon_agent=agent,
|
| 1052 |
+
organism_idx=organism_idx,
|
| 1053 |
+
device=device,
|
| 1054 |
+
use_adapters=True, # Enable trainable adapters!
|
| 1055 |
+
freeze_brains=True # Freeze brains, only train adapters
|
| 1056 |
+
)
|
| 1057 |
+
|
| 1058 |
+
# Create urgency modulator for time-pressure awareness
|
| 1059 |
+
urgency = UrgencyModulator(
|
| 1060 |
+
expected_time=track_time,
|
| 1061 |
+
alpha=urgency_alpha,
|
| 1062 |
+
step_duration=0.05 # ~20Hz TMRL default
|
| 1063 |
+
)
|
| 1064 |
+
actor.urgency = urgency
|
| 1065 |
+
print(f"⏱️ Urgency system: {track_time}s expected, α={urgency_alpha}")
|
| 1066 |
+
|
| 1067 |
+
training_summary = None
|
| 1068 |
+
brains_to_train: List[Any] = []
|
| 1069 |
+
optimizers: List[Any] = []
|
| 1070 |
+
experience_buffer = None
|
| 1071 |
+
buffer_lock = None
|
| 1072 |
+
train_signal = None
|
| 1073 |
+
training_stop_event = None
|
| 1074 |
+
training_thread = None
|
| 1075 |
+
training_losses: List[float] = []
|
| 1076 |
+
training_episode_rewards: List[float] = []
|
| 1077 |
+
best_reward = float('-inf')
|
| 1078 |
+
if enable_training:
|
| 1079 |
+
from collections import deque
|
| 1080 |
+
|
| 1081 |
+
# Get trainable parameters (adapters only since brains are frozen)
|
| 1082 |
+
trainable_params = actor.get_trainable_parameters()
|
| 1083 |
+
if trainable_params:
|
| 1084 |
+
print(f"🧠 Training adapters ({sum(p.numel() for p in trainable_params)} parameters)")
|
| 1085 |
+
optimizers = [torch.optim.Adam(trainable_params, lr=learning_rate)]
|
| 1086 |
+
brains_to_train = [actor] # Train actor (which contains adapters)
|
| 1087 |
+
else:
|
| 1088 |
+
# Fallback to training brains directly
|
| 1089 |
+
if organism_idx > 0 and organism_idx <= len(agent.brains):
|
| 1090 |
+
brains_to_train = [agent.brains[organism_idx - 1]]
|
| 1091 |
+
print(f"🧠 Training organism #{organism_idx} brain directly")
|
| 1092 |
+
else:
|
| 1093 |
+
brains_to_train = agent.brains
|
| 1094 |
+
print(f"🧠 Training ALL {len(brains_to_train)} organism brains")
|
| 1095 |
+
optimizers = [torch.optim.Adam(brain.parameters(), lr=learning_rate) for brain in brains_to_train]
|
| 1096 |
+
|
| 1097 |
+
experience_buffer = deque(maxlen=10000)
|
| 1098 |
+
buffer_lock = threading.Lock()
|
| 1099 |
+
train_signal = queue.Queue()
|
| 1100 |
+
training_stop_event = threading.Event()
|
| 1101 |
+
training_thread = threading.Thread(
|
| 1102 |
+
target=_training_worker_adapters,
|
| 1103 |
+
args=(
|
| 1104 |
+
training_stop_event,
|
| 1105 |
+
experience_buffer,
|
| 1106 |
+
buffer_lock,
|
| 1107 |
+
train_signal,
|
| 1108 |
+
actor,
|
| 1109 |
+
optimizers,
|
| 1110 |
+
batch_size,
|
| 1111 |
+
gamma,
|
| 1112 |
+
device,
|
| 1113 |
+
training_losses
|
| 1114 |
+
),
|
| 1115 |
+
daemon=True
|
| 1116 |
+
)
|
| 1117 |
+
training_thread.start()
|
| 1118 |
+
print(f" Training mode: lr={learning_rate} batch={batch_size} γ={gamma} train_every={train_every}")
|
| 1119 |
+
if save_every:
|
| 1120 |
+
print(f" Checkpoints every {save_every} episode(s)")
|
| 1121 |
+
print()
|
| 1122 |
+
|
| 1123 |
+
results = []
|
| 1124 |
+
|
| 1125 |
+
print(f"\n🏎️ TRACKMANIA DRIVER")
|
| 1126 |
+
print(f" Organism: {'ensemble' if organism_idx == 0 else f'#{organism_idx}'}")
|
| 1127 |
+
print(f" Episodes: {episodes}")
|
| 1128 |
+
if enable_training:
|
| 1129 |
+
print(" Training: ENABLED (background updates mid-drive)")
|
| 1130 |
+
print()
|
| 1131 |
+
|
| 1132 |
+
for ep in range(episodes):
|
| 1133 |
+
obs, info = env.reset()
|
| 1134 |
+
done = False
|
| 1135 |
+
total_reward = 0
|
| 1136 |
+
total_raw_reward = 0 # Track unshaped reward for comparison
|
| 1137 |
+
steps = 0
|
| 1138 |
+
reward_history = [] # Track rewards for debugging
|
| 1139 |
+
episode_experiences = None
|
| 1140 |
+
if enable_training:
|
| 1141 |
+
episode_experiences = []
|
| 1142 |
+
|
| 1143 |
+
# Reset urgency for new episode
|
| 1144 |
+
urgency.reset()
|
| 1145 |
+
|
| 1146 |
+
print(f"Episode {ep + 1}/{episodes}...")
|
| 1147 |
+
print(" [step] Vote breakdown → Action | controls | reward")
|
| 1148 |
+
print(" " + "─" * 60)
|
| 1149 |
+
|
| 1150 |
+
while not done:
|
| 1151 |
+
state_for_training = _preprocess_obs_for_training(obs) if enable_training else None
|
| 1152 |
+
action = actor.act(obs, test=True)
|
| 1153 |
+
|
| 1154 |
+
result = env.step(action)
|
| 1155 |
+
if len(result) == 5:
|
| 1156 |
+
obs, reward, terminated, truncated, info = result
|
| 1157 |
+
done = terminated or truncated
|
| 1158 |
+
else:
|
| 1159 |
+
obs, reward, done, info = result
|
| 1160 |
+
|
| 1161 |
+
# Apply urgency shaping to reward
|
| 1162 |
+
raw_reward = reward
|
| 1163 |
+
total_raw_reward += raw_reward
|
| 1164 |
+
shaped_reward = urgency.shape_reward(raw_reward)
|
| 1165 |
+
urgency.step() # Advance urgency clock
|
| 1166 |
+
|
| 1167 |
+
# Use shaped reward for training
|
| 1168 |
+
total_reward += shaped_reward
|
| 1169 |
+
reward_history.append(shaped_reward)
|
| 1170 |
+
steps += 1
|
| 1171 |
+
|
| 1172 |
+
if enable_training and state_for_training is not None and episode_experiences is not None:
|
| 1173 |
+
episode_experiences.append({
|
| 1174 |
+
'state': state_for_training,
|
| 1175 |
+
'reward': shaped_reward, # Use shaped reward!
|
| 1176 |
+
'raw_reward': raw_reward,
|
| 1177 |
+
'urgency': urgency.get_urgency(),
|
| 1178 |
+
'time_pressure': urgency.get_time_pressure(),
|
| 1179 |
+
'done': done
|
| 1180 |
+
})
|
| 1181 |
+
if train_every > 0 and steps % train_every == 0 and train_signal is not None:
|
| 1182 |
+
train_signal.put(1)
|
| 1183 |
+
|
| 1184 |
+
# Show reward every 5 steps (synced with vote debug in actor.act)
|
| 1185 |
+
if steps % 5 == 0:
|
| 1186 |
+
recent_rewards = reward_history[-5:]
|
| 1187 |
+
avg_recent = sum(recent_rewards) / len(recent_rewards)
|
| 1188 |
+
reward_trend = "📈" if avg_recent > 0 else "📉" if avg_recent < 0 else "➡️"
|
| 1189 |
+
urg_pct = urgency.get_time_pressure() * 100
|
| 1190 |
+
urg_mult = urgency.get_urgency()
|
| 1191 |
+
# Also show speed if available
|
| 1192 |
+
try:
|
| 1193 |
+
if isinstance(obs, tuple) and len(obs) > 0:
|
| 1194 |
+
speed_val = float(obs[0][0]) if isinstance(obs[0], np.ndarray) else float(obs[0])
|
| 1195 |
+
print(f" speed={speed_val:.0f} reward={shaped_reward:+.3f} (avg: {avg_recent:+.3f}) {reward_trend} ⏱{urg_pct:.0f}% ×{urg_mult:.1f}")
|
| 1196 |
+
else:
|
| 1197 |
+
print(f" reward={shaped_reward:+.3f} (avg: {avg_recent:+.3f}) {reward_trend} ⏱{urg_pct:.0f}% ×{urg_mult:.1f}")
|
| 1198 |
+
except Exception:
|
| 1199 |
+
print(f" reward={shaped_reward:+.3f} (avg: {avg_recent:+.3f}) {reward_trend} ⏱{urg_pct:.0f}% ×{urg_mult:.1f}")
|
| 1200 |
+
|
| 1201 |
+
print(" " + "─" * 60)
|
| 1202 |
+
final_urg = urgency.get_urgency()
|
| 1203 |
+
elapsed = urgency.get_elapsed_time()
|
| 1204 |
+
print(f" ✓ Finished! Shaped Reward: {total_reward:+.2f} (raw: {total_raw_reward:+.2f}), Steps: {steps}")
|
| 1205 |
+
print(f" ⏱️ Time: {elapsed:.1f}s elapsed, final urgency: ×{final_urg:.1f}")
|
| 1206 |
+
|
| 1207 |
+
# Show reward distribution
|
| 1208 |
+
positive_steps = sum(1 for r in reward_history if r > 0)
|
| 1209 |
+
negative_steps = sum(1 for r in reward_history if r < 0)
|
| 1210 |
+
zero_steps = sum(1 for r in reward_history if abs(r) < 1e-6) # Near-zero after shaping
|
| 1211 |
+
print(f" Reward breakdown: +ve:{positive_steps} | -ve:{negative_steps} | ~zero:{zero_steps}")
|
| 1212 |
+
|
| 1213 |
+
if enable_training and episode_experiences:
|
| 1214 |
+
if experience_buffer is not None and buffer_lock is not None:
|
| 1215 |
+
with buffer_lock:
|
| 1216 |
+
_add_episode_with_returns(experience_buffer, episode_experiences, gamma)
|
| 1217 |
+
elif experience_buffer is not None:
|
| 1218 |
+
_add_episode_with_returns(experience_buffer, episode_experiences, gamma)
|
| 1219 |
+
training_episode_rewards.append(total_reward)
|
| 1220 |
+
recent_window = training_episode_rewards[-10:]
|
| 1221 |
+
avg_recent = np.mean(recent_window)
|
| 1222 |
+
print(f" Training stats → recent avg shaped reward: {avg_recent:+.2f}")
|
| 1223 |
+
improved = total_reward > best_reward
|
| 1224 |
+
if improved:
|
| 1225 |
+
best_reward = total_reward
|
| 1226 |
+
if save_every and (ep + 1) % save_every == 0 and improved:
|
| 1227 |
+
_save_trained_cocoon(agent, save_path, ep + 1)
|
| 1228 |
+
if train_signal is not None:
|
| 1229 |
+
train_signal.put(1)
|
| 1230 |
+
|
| 1231 |
+
results.append({
|
| 1232 |
+
'episode': ep + 1,
|
| 1233 |
+
'reward': total_reward,
|
| 1234 |
+
'steps': steps,
|
| 1235 |
+
'info': info
|
| 1236 |
+
})
|
| 1237 |
+
|
| 1238 |
+
env.close()
|
| 1239 |
+
|
| 1240 |
+
# Stop the magnified viewer
|
| 1241 |
+
try:
|
| 1242 |
+
viewer_running[0] = False
|
| 1243 |
+
cv2.destroyAllWindows()
|
| 1244 |
+
except:
|
| 1245 |
+
pass
|
| 1246 |
+
|
| 1247 |
+
if enable_training and training_thread is not None:
|
| 1248 |
+
training_stop_event.set()
|
| 1249 |
+
if train_signal is not None:
|
| 1250 |
+
train_signal.put(None)
|
| 1251 |
+
training_thread.join(timeout=5)
|
| 1252 |
+
|
| 1253 |
+
# Summary
|
| 1254 |
+
avg_reward = np.mean([r['reward'] for r in results])
|
| 1255 |
+
print(f"\n📊 Average reward: {avg_reward:.1f}")
|
| 1256 |
+
|
| 1257 |
+
if enable_training:
|
| 1258 |
+
_save_trained_cocoon(agent, save_path, episodes)
|
| 1259 |
+
final_recent = np.mean(training_episode_rewards[-10:]) if training_episode_rewards else avg_reward
|
| 1260 |
+
print("📚 Training summary")
|
| 1261 |
+
print(f" Episodes: {episodes}")
|
| 1262 |
+
print(f" Best reward: {best_reward:.1f}")
|
| 1263 |
+
print(f" Final avg(10): {final_recent:.1f}")
|
| 1264 |
+
training_summary = {
|
| 1265 |
+
'episode_rewards': training_episode_rewards,
|
| 1266 |
+
'training_losses': training_losses,
|
| 1267 |
+
'best_reward': best_reward,
|
| 1268 |
+
'final_avg_10': final_recent
|
| 1269 |
+
}
|
| 1270 |
+
|
| 1271 |
+
return {
|
| 1272 |
+
'episodes': results,
|
| 1273 |
+
'training': training_summary
|
| 1274 |
+
}
|
| 1275 |
+
|
| 1276 |
+
|
| 1277 |
+
# =============================================================================
|
| 1278 |
+
# TRAINING MODE - Learn while driving!
|
| 1279 |
+
# =============================================================================
|
| 1280 |
+
|
| 1281 |
+
def train_in_trackmania(
|
| 1282 |
+
cocoon_agent: Optional['CocoonAgent'] = None,
|
| 1283 |
+
organism_idx: int = 0,
|
| 1284 |
+
episodes: int = 100,
|
| 1285 |
+
learning_rate: float = 1e-4,
|
| 1286 |
+
batch_size: int = 32,
|
| 1287 |
+
gamma: float = 0.99,
|
| 1288 |
+
train_every: int = 4,
|
| 1289 |
+
save_every: int = 10,
|
| 1290 |
+
save_path: Optional[str] = None,
|
| 1291 |
+
device: str = "cpu"
|
| 1292 |
+
) -> Dict[str, Any]:
|
| 1293 |
+
"""
|
| 1294 |
+
🧠 TRAINING MODE - Organisms learn from TrackMania experience!
|
| 1295 |
+
|
| 1296 |
+
Uses simple policy gradient (REINFORCE with baseline) to update
|
| 1297 |
+
the organism's brain weights based on racing performance.
|
| 1298 |
+
|
| 1299 |
+
Args:
|
| 1300 |
+
cocoon_agent: Your CocoonAgent
|
| 1301 |
+
organism_idx: Which organism to train (0 = trains all via ensemble)
|
| 1302 |
+
episodes: Number of training episodes
|
| 1303 |
+
learning_rate: Learning rate for optimizer
|
| 1304 |
+
batch_size: Experiences per training batch
|
| 1305 |
+
gamma: Discount factor for rewards
|
| 1306 |
+
train_every: Train after this many steps
|
| 1307 |
+
save_every: Save cocoon every N episodes
|
| 1308 |
+
save_path: Where to save updated cocoon (None = auto)
|
| 1309 |
+
device: "cpu" or "cuda"
|
| 1310 |
+
|
| 1311 |
+
Returns:
|
| 1312 |
+
Dict with training stats and updated agent
|
| 1313 |
+
"""
|
| 1314 |
+
result = drive_trackmania(
|
| 1315 |
+
cocoon_agent=cocoon_agent,
|
| 1316 |
+
organism_idx=organism_idx,
|
| 1317 |
+
episodes=episodes,
|
| 1318 |
+
render=True,
|
| 1319 |
+
device=device,
|
| 1320 |
+
enable_training=True,
|
| 1321 |
+
learning_rate=learning_rate,
|
| 1322 |
+
batch_size=batch_size,
|
| 1323 |
+
gamma=gamma,
|
| 1324 |
+
train_every=train_every,
|
| 1325 |
+
save_every=save_every,
|
| 1326 |
+
save_path=save_path
|
| 1327 |
+
)
|
| 1328 |
+
return result.get('training') if isinstance(result, dict) else result
|
| 1329 |
+
|
| 1330 |
+
|
| 1331 |
+
def _preprocess_obs_for_training(obs) -> np.ndarray:
|
| 1332 |
+
"""Convert TMRL observation to flat numpy array."""
|
| 1333 |
+
if isinstance(obs, tuple):
|
| 1334 |
+
flat = []
|
| 1335 |
+
for o in obs:
|
| 1336 |
+
if isinstance(o, np.ndarray):
|
| 1337 |
+
flat.append(o.flatten())
|
| 1338 |
+
else:
|
| 1339 |
+
flat.append(np.array([o]).flatten())
|
| 1340 |
+
return np.concatenate(flat).astype(np.float32)
|
| 1341 |
+
elif isinstance(obs, dict):
|
| 1342 |
+
return np.concatenate([v.flatten() for v in obs.values()]).astype(np.float32)
|
| 1343 |
+
else:
|
| 1344 |
+
return np.asarray(obs, dtype=np.float32).flatten()
|
| 1345 |
+
|
| 1346 |
+
|
| 1347 |
+
def _add_episode_with_returns(buffer, experiences, gamma):
|
| 1348 |
+
"""Add episode experiences with computed returns (rewards-to-go)."""
|
| 1349 |
+
returns = []
|
| 1350 |
+
R = 0
|
| 1351 |
+
for exp in reversed(experiences):
|
| 1352 |
+
R = exp['reward'] + gamma * R
|
| 1353 |
+
returns.insert(0, R)
|
| 1354 |
+
|
| 1355 |
+
for exp, ret in zip(experiences, returns):
|
| 1356 |
+
exp['return'] = ret
|
| 1357 |
+
buffer.append(exp)
|
| 1358 |
+
|
| 1359 |
+
|
| 1360 |
+
def _training_worker_adapters(stop_event, buffer, buffer_lock, signal_queue, actor, optimizers, batch_size, gamma, device, training_losses):
|
| 1361 |
+
"""Background thread: trains ADAPTERS (not brains) using policy gradient."""
|
| 1362 |
+
while not stop_event.is_set():
|
| 1363 |
+
try:
|
| 1364 |
+
signal = signal_queue.get(timeout=0.5)
|
| 1365 |
+
except queue.Empty:
|
| 1366 |
+
continue
|
| 1367 |
+
if signal is None and stop_event.is_set():
|
| 1368 |
+
break
|
| 1369 |
+
with buffer_lock:
|
| 1370 |
+
buffer_snapshot = list(buffer)
|
| 1371 |
+
if not buffer_snapshot:
|
| 1372 |
+
continue
|
| 1373 |
+
loss = _train_step_adapters(actor, optimizers, buffer_snapshot, batch_size, gamma, device)
|
| 1374 |
+
training_losses.append(loss)
|
| 1375 |
+
|
| 1376 |
+
|
| 1377 |
+
def _train_step_adapters(actor, optimizers, buffer_data, batch_size, gamma, device):
|
| 1378 |
+
"""Perform one training step on adapters using policy gradient."""
|
| 1379 |
+
import random
|
| 1380 |
+
|
| 1381 |
+
data_source = list(buffer_data)
|
| 1382 |
+
if not data_source:
|
| 1383 |
+
return 0.0
|
| 1384 |
+
|
| 1385 |
+
# Sample batch
|
| 1386 |
+
batch = random.sample(data_source, min(batch_size, len(data_source)))
|
| 1387 |
+
|
| 1388 |
+
# Ensure adapters exist and are in training mode
|
| 1389 |
+
if actor.input_adapter is None or actor.output_adapter is None:
|
| 1390 |
+
return 0.0
|
| 1391 |
+
|
| 1392 |
+
actor.input_adapter.train()
|
| 1393 |
+
actor.output_adapter.train()
|
| 1394 |
+
|
| 1395 |
+
# Zero gradients
|
| 1396 |
+
for opt in optimizers:
|
| 1397 |
+
opt.zero_grad()
|
| 1398 |
+
|
| 1399 |
+
loss = torch.tensor(0.0, device=device, requires_grad=True)
|
| 1400 |
+
|
| 1401 |
+
for exp in batch:
|
| 1402 |
+
state = torch.FloatTensor(exp['state']).unsqueeze(0).to(device)
|
| 1403 |
+
action_taken = exp.get('action', None)
|
| 1404 |
+
ret = exp['return']
|
| 1405 |
+
|
| 1406 |
+
# Forward through adapters and brain
|
| 1407 |
+
adapted_state = actor.input_adapter(state)
|
| 1408 |
+
|
| 1409 |
+
# Get brain output (frozen, no grad)
|
| 1410 |
+
with torch.no_grad():
|
| 1411 |
+
if actor.brain:
|
| 1412 |
+
brain_output = actor.brain(adapted_state, return_language_logits=False)
|
| 1413 |
+
else:
|
| 1414 |
+
outputs = []
|
| 1415 |
+
for brain in actor.cocoon.brains:
|
| 1416 |
+
out = brain(adapted_state, return_language_logits=False)
|
| 1417 |
+
if isinstance(out, tuple):
|
| 1418 |
+
out = out[0]
|
| 1419 |
+
outputs.append(out)
|
| 1420 |
+
brain_output = torch.mean(torch.stack(outputs), dim=0)
|
| 1421 |
+
|
| 1422 |
+
if isinstance(brain_output, tuple):
|
| 1423 |
+
brain_output = brain_output[0]
|
| 1424 |
+
|
| 1425 |
+
# Forward through output adapter (trainable)
|
| 1426 |
+
action_tensor = actor.output_adapter(brain_output[:, :4])
|
| 1427 |
+
|
| 1428 |
+
# Simple reward-weighted loss
|
| 1429 |
+
# Higher returns should make current action more likely
|
| 1430 |
+
action_norm = torch.norm(action_tensor)
|
| 1431 |
+
step_loss = -ret * action_norm # Negative because optimizer minimizes
|
| 1432 |
+
loss = loss + step_loss
|
| 1433 |
+
|
| 1434 |
+
loss = loss / len(batch)
|
| 1435 |
+
loss.backward()
|
| 1436 |
+
|
| 1437 |
+
# Gradient clipping
|
| 1438 |
+
if actor.input_adapter is not None:
|
| 1439 |
+
torch.nn.utils.clip_grad_norm_(actor.input_adapter.parameters(), 1.0)
|
| 1440 |
+
if actor.output_adapter is not None:
|
| 1441 |
+
torch.nn.utils.clip_grad_norm_(actor.output_adapter.parameters(), 1.0)
|
| 1442 |
+
|
| 1443 |
+
for opt in optimizers:
|
| 1444 |
+
opt.step()
|
| 1445 |
+
|
| 1446 |
+
return loss.item()
|
| 1447 |
+
|
| 1448 |
+
|
| 1449 |
+
def _training_worker(stop_event, buffer, buffer_lock, signal_queue, brains, optimizers, batch_size, gamma, device, training_losses):
|
| 1450 |
+
"""Background thread: waits for signals, then performs training steps."""
|
| 1451 |
+
while not stop_event.is_set():
|
| 1452 |
+
try:
|
| 1453 |
+
signal = signal_queue.get(timeout=0.5)
|
| 1454 |
+
except queue.Empty:
|
| 1455 |
+
continue
|
| 1456 |
+
if signal is None and stop_event.is_set():
|
| 1457 |
+
break
|
| 1458 |
+
with buffer_lock:
|
| 1459 |
+
buffer_snapshot = list(buffer)
|
| 1460 |
+
if not buffer_snapshot:
|
| 1461 |
+
continue
|
| 1462 |
+
loss = _train_step(brains, optimizers, buffer_snapshot, batch_size, gamma, device)
|
| 1463 |
+
training_losses.append(loss)
|
| 1464 |
+
|
| 1465 |
+
|
| 1466 |
+
def _train_step(brains, optimizers, buffer_data, batch_size, gamma, device):
|
| 1467 |
+
"""Perform one training step with policy gradient."""
|
| 1468 |
+
import random
|
| 1469 |
+
|
| 1470 |
+
data_source = list(buffer_data)
|
| 1471 |
+
if not data_source:
|
| 1472 |
+
return 0.0
|
| 1473 |
+
|
| 1474 |
+
# Sample batch
|
| 1475 |
+
batch = random.sample(data_source, min(batch_size, len(data_source)))
|
| 1476 |
+
|
| 1477 |
+
# Compute loss for each brain
|
| 1478 |
+
total_loss = 0
|
| 1479 |
+
|
| 1480 |
+
for brain, optimizer in zip(brains, optimizers):
|
| 1481 |
+
brain.train()
|
| 1482 |
+
optimizer.zero_grad()
|
| 1483 |
+
|
| 1484 |
+
loss = torch.tensor(0.0, device=device)
|
| 1485 |
+
|
| 1486 |
+
for exp in batch:
|
| 1487 |
+
state = torch.FloatTensor(exp['state']).unsqueeze(0).to(device)
|
| 1488 |
+
ret = exp['return']
|
| 1489 |
+
|
| 1490 |
+
# Forward pass
|
| 1491 |
+
output = brain(state, return_language_logits=False)
|
| 1492 |
+
if isinstance(output, tuple):
|
| 1493 |
+
output = output[0]
|
| 1494 |
+
|
| 1495 |
+
# Simple policy gradient: maximize return * log_prob
|
| 1496 |
+
# Using softmax log_prob approximation
|
| 1497 |
+
log_probs = torch.log_softmax(output.flatten()[:3], dim=0)
|
| 1498 |
+
|
| 1499 |
+
# Reward-weighted loss (negative because we minimize)
|
| 1500 |
+
loss = loss - (ret * log_probs.mean())
|
| 1501 |
+
|
| 1502 |
+
loss = loss / len(batch)
|
| 1503 |
+
loss.backward()
|
| 1504 |
+
|
| 1505 |
+
# Gradient clipping
|
| 1506 |
+
torch.nn.utils.clip_grad_norm_(brain.parameters(), 1.0)
|
| 1507 |
+
|
| 1508 |
+
optimizer.step()
|
| 1509 |
+
total_loss += loss.item()
|
| 1510 |
+
|
| 1511 |
+
return total_loss / len(brains)
|
| 1512 |
+
|
| 1513 |
+
|
| 1514 |
+
def _save_trained_cocoon(agent, save_path, episode):
|
| 1515 |
+
"""Save the updated cocoon with trained weights."""
|
| 1516 |
+
if save_path is None:
|
| 1517 |
+
save_path = f"cocoon_trained_ep{episode}.py"
|
| 1518 |
+
|
| 1519 |
+
try:
|
| 1520 |
+
if hasattr(agent, 'export_cocoon'):
|
| 1521 |
+
agent.export_cocoon(save_path)
|
| 1522 |
+
print(f" 💾 Saved: {save_path}")
|
| 1523 |
+
else:
|
| 1524 |
+
# Fallback: save just the state dicts
|
| 1525 |
+
import pickle
|
| 1526 |
+
state_dicts = [brain.state_dict() for brain in agent.brains]
|
| 1527 |
+
with open(save_path.replace('.py', '_weights.pkl'), 'wb') as f:
|
| 1528 |
+
pickle.dump(state_dicts, f)
|
| 1529 |
+
print(f" 💾 Saved weights: {save_path.replace('.py', '_weights.pkl')}")
|
| 1530 |
+
except Exception as e:
|
| 1531 |
+
print(f" ⚠️ Save failed: {e}")
|
| 1532 |
+
|
| 1533 |
+
|
| 1534 |
+
# =============================================================================
|
| 1535 |
+
# MAIN
|
| 1536 |
+
# =============================================================================
|
| 1537 |
+
|
| 1538 |
+
def main():
|
| 1539 |
+
"""Demo and usage information."""
|
| 1540 |
+
import argparse
|
| 1541 |
+
import glob
|
| 1542 |
+
import importlib.util
|
| 1543 |
+
|
| 1544 |
+
parser = argparse.ArgumentParser(description="🏎️ Cocoon TMRL Adapter - Drive TrackMania with your organisms")
|
| 1545 |
+
parser.add_argument('--drive', action='store_true', help='Start driving in TrackMania (inference only)')
|
| 1546 |
+
parser.add_argument('--train', action='store_true', help='Train while driving (organisms learn!)')
|
| 1547 |
+
parser.add_argument('--organism', type=int, default=0, help='Organism index (0=ensemble, 1+=specific)')
|
| 1548 |
+
parser.add_argument('--episodes', type=int, default=5, help='Number of episodes to run')
|
| 1549 |
+
parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate (training mode)')
|
| 1550 |
+
parser.add_argument('--cocoon', type=str, default=None, help='Path to cocoon.py file')
|
| 1551 |
+
parser.add_argument('--save', type=str, default=None, help='Path to save trained cocoon')
|
| 1552 |
+
parser.add_argument('--track-time', type=float, default=60.0, help='Expected track completion time in seconds (default: 60)')
|
| 1553 |
+
parser.add_argument('--urgency-alpha', type=float, default=2.0, help='Urgency exponential steepness (default: 2.0)')
|
| 1554 |
+
parser.add_argument('--doctor', action='store_true', help='Run setup diagnostics (does not launch TrackMania)')
|
| 1555 |
+
args = parser.parse_args()
|
| 1556 |
+
|
| 1557 |
+
print("🏎️ COCOON TMRL ADAPTER")
|
| 1558 |
+
print("=" * 50)
|
| 1559 |
+
print()
|
| 1560 |
+
|
| 1561 |
+
if args.doctor:
|
| 1562 |
+
raise SystemExit(_doctor(args.cocoon))
|
| 1563 |
+
|
| 1564 |
+
# If no explicit cocoon path was passed, try to auto-detect exports in the CWD.
|
| 1565 |
+
if not args.cocoon:
|
| 1566 |
+
_try_load_cocoon(quiet=True, scan_exports=True)
|
| 1567 |
+
|
| 1568 |
+
save_path = args.save
|
| 1569 |
+
cocoon_dir = None
|
| 1570 |
+
cocoon_name = None
|
| 1571 |
+
|
| 1572 |
+
# Try to load cocoon from specified path
|
| 1573 |
+
if args.cocoon:
|
| 1574 |
+
cocoon_path = os.path.abspath(args.cocoon)
|
| 1575 |
+
cocoon_dir = os.path.dirname(cocoon_path)
|
| 1576 |
+
cocoon_name = os.path.basename(cocoon_path).replace('.py', '')
|
| 1577 |
+
|
| 1578 |
+
if not os.path.isfile(cocoon_path):
|
| 1579 |
+
print(f"❌ Cocoon file not found: {cocoon_path}")
|
| 1580 |
+
if os.path.isdir(cocoon_dir):
|
| 1581 |
+
candidates = sorted(glob.glob(os.path.join(cocoon_dir, 'cocoon_*.py')))
|
| 1582 |
+
if candidates:
|
| 1583 |
+
print(" Found these nearby:")
|
| 1584 |
+
for c in candidates[:10]:
|
| 1585 |
+
print(f" - {os.path.basename(c)}")
|
| 1586 |
+
_print_basic_setup_instructions()
|
| 1587 |
+
return
|
| 1588 |
+
|
| 1589 |
+
try:
|
| 1590 |
+
print("⏳ Loading cocoon (this may take a moment for large files)...")
|
| 1591 |
+
spec = importlib.util.spec_from_file_location("_cocoon_from_cli", cocoon_path)
|
| 1592 |
+
if spec is None or spec.loader is None:
|
| 1593 |
+
raise RuntimeError("Could not create import spec")
|
| 1594 |
+
cocoon_module = importlib.util.module_from_spec(spec)
|
| 1595 |
+
sys.modules[spec.name] = cocoon_module
|
| 1596 |
+
spec.loader.exec_module(cocoon_module)
|
| 1597 |
+
_ensure_json_default(cocoon_module)
|
| 1598 |
+
global CocoonAgent, COCOON_AVAILABLE
|
| 1599 |
+
CocoonAgent = cocoon_module.CocoonAgent
|
| 1600 |
+
COCOON_AVAILABLE = True
|
| 1601 |
+
print(f"✅ Loaded cocoon from: {cocoon_path}")
|
| 1602 |
+
except Exception as e:
|
| 1603 |
+
print(f"❌ Failed to load {cocoon_path}: {e}")
|
| 1604 |
+
import traceback
|
| 1605 |
+
traceback.print_exc()
|
| 1606 |
+
return
|
| 1607 |
+
|
| 1608 |
+
if save_path is None and cocoon_dir and cocoon_name:
|
| 1609 |
+
save_path = os.path.join(cocoon_dir, f"{cocoon_name}_trained.py")
|
| 1610 |
+
print(f"💾 Training outputs will be saved to: {save_path}")
|
| 1611 |
+
|
| 1612 |
+
if not COCOON_AVAILABLE:
|
| 1613 |
+
print("❌ No cocoon found!")
|
| 1614 |
+
print()
|
| 1615 |
+
print("SETUP OPTIONS:")
|
| 1616 |
+
print()
|
| 1617 |
+
print(" 1. Put cocoon.py in the same folder as this script")
|
| 1618 |
+
print(" 2. Use --cocoon path/to/your/cocoon_ensemble_*.py")
|
| 1619 |
+
print(" 3. Rename your export to cocoon.py")
|
| 1620 |
+
print(" 4. Run: python cocoon_tmrl_adapter.py --doctor --cocoon path/to/cocoon.py")
|
| 1621 |
+
print()
|
| 1622 |
+
return
|
| 1623 |
+
|
| 1624 |
+
# Lazy load TMRL after cocoon is ready
|
| 1625 |
+
print("⏳ Loading TMRL (TrackMania interface)...")
|
| 1626 |
+
if not _lazy_load_tmrl():
|
| 1627 |
+
print("❌ TMRL not available!")
|
| 1628 |
+
print(" Run: pip install tmrl")
|
| 1629 |
+
print()
|
| 1630 |
+
return
|
| 1631 |
+
|
| 1632 |
+
print("✅ Cocoon found")
|
| 1633 |
+
print("✅ TMRL available")
|
| 1634 |
+
print()
|
| 1635 |
+
|
| 1636 |
+
# Load cocoon
|
| 1637 |
+
agent = CocoonAgent()
|
| 1638 |
+
print(f"���� Loaded cocoon with {len(agent.brains)} organism brains")
|
| 1639 |
+
print()
|
| 1640 |
+
|
| 1641 |
+
if args.drive:
|
| 1642 |
+
print("🏎️ Starting TrackMania driver...")
|
| 1643 |
+
if args.train:
|
| 1644 |
+
print(" Training ENABLED: gradients update on a background thread")
|
| 1645 |
+
else:
|
| 1646 |
+
print(" Mode: inference-only")
|
| 1647 |
+
print(" Make sure TrackMania 2020 is running with OpenPlanet!")
|
| 1648 |
+
print()
|
| 1649 |
+
results = drive_trackmania(
|
| 1650 |
+
cocoon_agent=agent,
|
| 1651 |
+
organism_idx=args.organism,
|
| 1652 |
+
episodes=args.episodes,
|
| 1653 |
+
enable_training=args.train,
|
| 1654 |
+
learning_rate=args.lr,
|
| 1655 |
+
save_path=save_path,
|
| 1656 |
+
track_time=args.track_time,
|
| 1657 |
+
urgency_alpha=args.urgency_alpha
|
| 1658 |
+
)
|
| 1659 |
+
elif args.train:
|
| 1660 |
+
# Back-compat: allow training without explicit --drive flag
|
| 1661 |
+
print("🧠 TrackMania TRAINING (drive loop shared)...")
|
| 1662 |
+
print(" Make sure TrackMania 2020 is running with OpenPlanet!")
|
| 1663 |
+
print()
|
| 1664 |
+
results = drive_trackmania(
|
| 1665 |
+
cocoon_agent=agent,
|
| 1666 |
+
organism_idx=args.organism,
|
| 1667 |
+
episodes=args.episodes,
|
| 1668 |
+
enable_training=True,
|
| 1669 |
+
learning_rate=args.lr,
|
| 1670 |
+
save_path=save_path,
|
| 1671 |
+
track_time=args.track_time,
|
| 1672 |
+
urgency_alpha=args.urgency_alpha
|
| 1673 |
+
)
|
| 1674 |
+
else:
|
| 1675 |
+
# Just show usage
|
| 1676 |
+
print("USAGE:")
|
| 1677 |
+
print()
|
| 1678 |
+
print(" # INFERENCE - Just drive (no learning):")
|
| 1679 |
+
print(" python cocoon_tmrl_adapter.py --drive")
|
| 1680 |
+
print(" python cocoon_tmrl_adapter.py --drive --organism 3 --episodes 10")
|
| 1681 |
+
print()
|
| 1682 |
+
print(" # TRAINING - Organisms learn while racing!")
|
| 1683 |
+
print(" python cocoon_tmrl_adapter.py --train --episodes 100")
|
| 1684 |
+
print(" python cocoon_tmrl_adapter.py --train --organism 1 --lr 0.0001 --save trained.py")
|
| 1685 |
+
print()
|
| 1686 |
+
print(" # URGENCY TUNING - Teach time pressure:")
|
| 1687 |
+
print(" python cocoon_tmrl_adapter.py --train --track-time 45 --urgency-alpha 2.5")
|
| 1688 |
+
print()
|
| 1689 |
+
print(" # With explicit cocoon path:")
|
| 1690 |
+
print(" python cocoon_tmrl_adapter.py --train --cocoon path/to/cocoon.py")
|
| 1691 |
+
print()
|
| 1692 |
+
print(" # In Python:")
|
| 1693 |
+
print(" from cocoon_tmrl_adapter import train_in_trackmania")
|
| 1694 |
+
print(" results = train_in_trackmania(organism_idx=1, episodes=100)")
|
| 1695 |
+
print()
|
| 1696 |
+
|
| 1697 |
+
# Quick test
|
| 1698 |
+
print("Quick test - creating actor module...")
|
| 1699 |
+
try:
|
| 1700 |
+
import gymnasium as gym
|
| 1701 |
+
dummy_obs_space = gym.spaces.Box(low=-1, high=1, shape=(28,), dtype=np.float32)
|
| 1702 |
+
dummy_act_space = gym.spaces.Box(low=np.array([0, 0, -1]), high=np.array([1, 1, 1]), dtype=np.float32)
|
| 1703 |
+
|
| 1704 |
+
actor = CocoonActorModule(
|
| 1705 |
+
observation_space=dummy_obs_space,
|
| 1706 |
+
action_space=dummy_act_space,
|
| 1707 |
+
cocoon_agent=agent,
|
| 1708 |
+
organism_idx=args.organism or 1
|
| 1709 |
+
)
|
| 1710 |
+
|
| 1711 |
+
# Test action
|
| 1712 |
+
dummy_obs = np.random.randn(28).astype(np.float32)
|
| 1713 |
+
action = actor.act(dummy_obs, test=True)
|
| 1714 |
+
|
| 1715 |
+
print(f"✅ Actor test passed!")
|
| 1716 |
+
print(f" Input: {dummy_obs.shape} observation")
|
| 1717 |
+
print(f" Output: {action} (gas, brake, steer)")
|
| 1718 |
+
|
| 1719 |
+
except Exception as e:
|
| 1720 |
+
print(f"⚠️ Actor test failed: {e}")
|
| 1721 |
+
|
| 1722 |
+
|
| 1723 |
+
if __name__ == "__main__":
|
| 1724 |
+
main()
|
UNPACK/curriculum/connector_words.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"stage": 1,
|
| 4 |
+
"name": "connector_seed",
|
| 5 |
+
"purpose": "Seed closed-class words before open-ended chat.",
|
| 6 |
+
"seed_words": [
|
| 7 |
+
"a",
|
| 8 |
+
"an",
|
| 9 |
+
"the",
|
| 10 |
+
"and",
|
| 11 |
+
"to",
|
| 12 |
+
"of",
|
| 13 |
+
"in",
|
| 14 |
+
"it",
|
| 15 |
+
"is",
|
| 16 |
+
"are",
|
| 17 |
+
"you",
|
| 18 |
+
"me",
|
| 19 |
+
"we",
|
| 20 |
+
"because",
|
| 21 |
+
"then"
|
| 22 |
+
],
|
| 23 |
+
"extended_words": [
|
| 24 |
+
"a",
|
| 25 |
+
"an",
|
| 26 |
+
"and",
|
| 27 |
+
"are",
|
| 28 |
+
"as",
|
| 29 |
+
"at",
|
| 30 |
+
"be",
|
| 31 |
+
"because",
|
| 32 |
+
"but",
|
| 33 |
+
"by",
|
| 34 |
+
"for",
|
| 35 |
+
"from",
|
| 36 |
+
"if",
|
| 37 |
+
"in",
|
| 38 |
+
"is",
|
| 39 |
+
"it",
|
| 40 |
+
"of",
|
| 41 |
+
"on",
|
| 42 |
+
"or",
|
| 43 |
+
"so",
|
| 44 |
+
"that",
|
| 45 |
+
"the",
|
| 46 |
+
"then",
|
| 47 |
+
"this",
|
| 48 |
+
"to",
|
| 49 |
+
"was",
|
| 50 |
+
"we",
|
| 51 |
+
"when",
|
| 52 |
+
"with",
|
| 53 |
+
"you"
|
| 54 |
+
],
|
| 55 |
+
"accept_single_character_connectors": true,
|
| 56 |
+
"lesson_shape": {
|
| 57 |
+
"speaker": "outside_coach",
|
| 58 |
+
"target": "cocoon_vocabulary",
|
| 59 |
+
"reward": "positive for adding connector words without output pressure"
|
| 60 |
+
}
|
| 61 |
+
}
|
UNPACK/curriculum/dialogue_frames.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"stages": [
|
| 4 |
+
{
|
| 5 |
+
"stage": 2,
|
| 6 |
+
"name": "echo_game",
|
| 7 |
+
"objective": "Exact short phrase copy before semantic conversation.",
|
| 8 |
+
"examples": [
|
| 9 |
+
{
|
| 10 |
+
"input": "I am clone",
|
| 11 |
+
"target": "I am clone"
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"input": "we are here",
|
| 15 |
+
"target": "we are here"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"input": "the ball is near",
|
| 19 |
+
"target": "the ball is near"
|
| 20 |
+
}
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"stage": 4,
|
| 25 |
+
"name": "turn_exchange_game",
|
| 26 |
+
"objective": "Tiny two-turn exchanges with stable role words.",
|
| 27 |
+
"examples": [
|
| 28 |
+
{
|
| 29 |
+
"input": "hello",
|
| 30 |
+
"target": "hello"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"input": "I see you",
|
| 34 |
+
"target": "you see me"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"input": "we move then rest",
|
| 38 |
+
"target": "we move then rest"
|
| 39 |
+
}
|
| 40 |
+
]
|
| 41 |
+
}
|
| 42 |
+
],
|
| 43 |
+
"constraints": {
|
| 44 |
+
"max_words_initial": 5,
|
| 45 |
+
"avoid_tool_prompts": true,
|
| 46 |
+
"avoid_json_syntax": true
|
| 47 |
+
}
|
| 48 |
+
}
|
UNPACK/curriculum/game_language_tasks.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"stage": 5,
|
| 4 |
+
"name": "game_language_binding",
|
| 5 |
+
"objective": "Attach language to RL observations, actions, and reward.",
|
| 6 |
+
"bindings": [
|
| 7 |
+
{
|
| 8 |
+
"state_hint": "ball distance small",
|
| 9 |
+
"phrase": "ball near",
|
| 10 |
+
"preferred_action_words": [
|
| 11 |
+
"catch",
|
| 12 |
+
"move"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"state_hint": "ball x negative",
|
| 17 |
+
"phrase": "move left",
|
| 18 |
+
"preferred_action_words": [
|
| 19 |
+
"left"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"state_hint": "ball x positive",
|
| 24 |
+
"phrase": "move right",
|
| 25 |
+
"preferred_action_words": [
|
| 26 |
+
"right"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"state_hint": "positive reward",
|
| 31 |
+
"phrase": "catch good",
|
| 32 |
+
"reward_target": 1.0
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"state_hint": "negative reward",
|
| 36 |
+
"phrase": "miss bad",
|
| 37 |
+
"reward_target": -1.0
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"state_hint": "high rolling reward",
|
| 41 |
+
"phrase": "reward high",
|
| 42 |
+
"reward_target": 0.8
|
| 43 |
+
}
|
| 44 |
+
],
|
| 45 |
+
"arena": {
|
| 46 |
+
"stage": 6,
|
| 47 |
+
"name": "clone_dialogue_arena",
|
| 48 |
+
"instruction": "Run original vs clone with one target per exchange; score each turn before training."
|
| 49 |
+
}
|
| 50 |
+
}
|
UNPACK/curriculum/reward_rubric.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"coach_role": "reward_judge_not_speaker",
|
| 4 |
+
"positive_rewards": {
|
| 5 |
+
"connector_use": 0.15,
|
| 6 |
+
"short_response_compliance": 0.2,
|
| 7 |
+
"exact_phrase_copy_early": 0.4,
|
| 8 |
+
"role_preservation": 0.35,
|
| 9 |
+
"game_state_word_alignment": 0.3
|
| 10 |
+
},
|
| 11 |
+
"penalties": {
|
| 12 |
+
"repetition_loop": -0.5,
|
| 13 |
+
"malformed_prompt_residue": -0.6,
|
| 14 |
+
"json_or_tool_syntax_when_not_training_tools": -0.6,
|
| 15 |
+
"long_unrequested_response": -0.2,
|
| 16 |
+
"role_confusion": -0.35
|
| 17 |
+
},
|
| 18 |
+
"blocked_residue_examples": [
|
| 19 |
+
"toolcocoonlistargs",
|
| 20 |
+
"functioncall",
|
| 21 |
+
"assistantto=functions",
|
| 22 |
+
"jsonschema",
|
| 23 |
+
"systemprompt"
|
| 24 |
+
]
|
| 25 |
+
}
|
UNPACK/curriculum/role_transform_tasks.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"stage": 3,
|
| 4 |
+
"name": "role_transform_game",
|
| 5 |
+
"objective": "Preserve speaker/listener perspective across short replies.",
|
| 6 |
+
"transforms": [
|
| 7 |
+
{
|
| 8 |
+
"input": "I am original",
|
| 9 |
+
"target": "you are original"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"input": "I am clone",
|
| 13 |
+
"target": "you are clone"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"input": "you see me",
|
| 17 |
+
"target": "I see you"
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"input": "we are together",
|
| 21 |
+
"target": "we are together"
|
| 22 |
+
}
|
| 23 |
+
],
|
| 24 |
+
"score_fields": [
|
| 25 |
+
"exact_match",
|
| 26 |
+
"pronoun_role_preserved",
|
| 27 |
+
"short_response"
|
| 28 |
+
]
|
| 29 |
+
}
|
UNPACK/jsbsim_quadcopter.py
ADDED
|
@@ -0,0 +1,1141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
🚁 QUADCOPTER FLIGHT DYNAMICS MODEL
|
| 3 |
+
|
| 4 |
+
Production-grade physics for sim-to-real drone applications.
|
| 5 |
+
Aligned with PX4/ArduPilot SITL and real hardware measurements.
|
| 6 |
+
|
| 7 |
+
REAL-WORLD ALIGNMENT:
|
| 8 |
+
✅ Motor dynamics with first-order lag (30ms time constant)
|
| 9 |
+
✅ Thrust coefficient from T-Motor dyno data (k_t = 1.03e-6)
|
| 10 |
+
✅ Inertia tensor from IEEE quadrotor identification papers
|
| 11 |
+
✅ ISA atmosphere model (density vs altitude)
|
| 12 |
+
✅ Cheeseman-Bennett ground effect model
|
| 13 |
+
✅ Rotor drag (H-force) during translation
|
| 14 |
+
✅ Sensor noise injection for sim-to-real training
|
| 15 |
+
✅ Battery model with voltage sag under load
|
| 16 |
+
|
| 17 |
+
REFERENCE PLATFORMS:
|
| 18 |
+
- DJI F450 (hobby/research)
|
| 19 |
+
- Holybro X500 (PX4 development)
|
| 20 |
+
- 5" racing quad (Betaflight)
|
| 21 |
+
|
| 22 |
+
SIM-TO-REAL NOTES:
|
| 23 |
+
- Enable sensor noise during training
|
| 24 |
+
- Use domain randomization on mass, inertia
|
| 25 |
+
- Real ESCs have additional latency (~10ms)
|
| 26 |
+
- PID gains will need on-hardware tuning
|
| 27 |
+
|
| 28 |
+
Architecture:
|
| 29 |
+
QuadcopterConfig - Hardware parameters (editable for your platform)
|
| 30 |
+
QuadcopterState - Full 13-DOF state vector
|
| 31 |
+
QuadcopterFDM - Flight dynamics model
|
| 32 |
+
QuadcopterEnv - Gymnasium RL environment wrapper
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
import numpy as np
|
| 36 |
+
import logging
|
| 37 |
+
from dataclasses import dataclass, field
|
| 38 |
+
from typing import Dict, Any, Optional, Tuple, List
|
| 39 |
+
import gymnasium as gym
|
| 40 |
+
from gymnasium import spaces
|
| 41 |
+
|
| 42 |
+
logger = logging.getLogger(__name__)
|
| 43 |
+
|
| 44 |
+
# Check for JSBSim availability (optional high-fidelity backend)
|
| 45 |
+
JSBSIM_AVAILABLE = False
|
| 46 |
+
try:
|
| 47 |
+
import jsbsim
|
| 48 |
+
JSBSIM_AVAILABLE = True
|
| 49 |
+
logger.info("✅ JSBSim flight dynamics available")
|
| 50 |
+
except ImportError:
|
| 51 |
+
logger.debug("JSBSim not installed (optional). Using built-in physics.")
|
| 52 |
+
jsbsim = None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class QuadcopterConfig:
|
| 57 |
+
"""
|
| 58 |
+
Quadcopter physical parameters - ALIGNED WITH REAL HARDWARE.
|
| 59 |
+
|
| 60 |
+
Reference platforms:
|
| 61 |
+
- DJI F450 frame (hobby/research standard)
|
| 62 |
+
- Holybro X500 (PX4 dev kit)
|
| 63 |
+
- Custom 5" racing quad (Betaflight)
|
| 64 |
+
|
| 65 |
+
All values derived from real datasheets and flight logs.
|
| 66 |
+
"""
|
| 67 |
+
# === FRAME GEOMETRY ===
|
| 68 |
+
# Mass properties (kg) - F450 with battery, camera
|
| 69 |
+
mass: float = 1.5 # 1.5kg = typical loaded weight
|
| 70 |
+
arm_length: float = 0.225 # meters (F450 = 450mm diagonal, so 225mm to motor)
|
| 71 |
+
|
| 72 |
+
# Inertia tensor (kg*m^2) - measured/calculated for X-config
|
| 73 |
+
# Source: "System Identification of a Quadrotor Micro Air Vehicle" (IEEE)
|
| 74 |
+
# These match PX4 SITL defaults for similar frame
|
| 75 |
+
Ixx: float = 0.0142 # roll inertia
|
| 76 |
+
Iyy: float = 0.0142 # pitch inertia (symmetric)
|
| 77 |
+
Izz: float = 0.0225 # yaw inertia (propeller contribution)
|
| 78 |
+
|
| 79 |
+
# === MOTOR/PROPELLER - T-Motor F40 Pro + 5x4.5 props ===
|
| 80 |
+
# Measured from thrust stand data
|
| 81 |
+
# At 100% throttle: ~900g thrust per motor
|
| 82 |
+
max_thrust_per_motor: float = 8.83 # Newtons (900g * 9.81 / 1000)
|
| 83 |
+
|
| 84 |
+
# Thrust coefficient: T = k_t * ω²
|
| 85 |
+
# From T-Motor data: 900g at 28000 RPM = 2932 rad/s
|
| 86 |
+
# k_t = 8.83 / (2932²) = 1.03e-6
|
| 87 |
+
thrust_coefficient: float = 1.03e-6 # N/(rad/s)² - from dyno data
|
| 88 |
+
|
| 89 |
+
# Torque coefficient: τ = k_q * ω²
|
| 90 |
+
# Typical ratio k_q/k_t ≈ 0.013-0.016 for 5" props
|
| 91 |
+
torque_coefficient: float = 1.5e-8 # Nm/(rad/s)²
|
| 92 |
+
|
| 93 |
+
# Motor time constant (first-order response lag)
|
| 94 |
+
# Real brushless motors: 20-50ms to reach commanded speed
|
| 95 |
+
motor_time_constant: float = 0.03 # seconds (30ms - typical racing quad)
|
| 96 |
+
|
| 97 |
+
# RPM limits (real ESC/motor limits)
|
| 98 |
+
min_rpm: float = 1000.0 # idle speed
|
| 99 |
+
max_rpm: float = 28000.0 # full throttle
|
| 100 |
+
|
| 101 |
+
# === AERODYNAMICS ===
|
| 102 |
+
# Drag coefficient - measured in wind tunnel for similar frames
|
| 103 |
+
drag_coefficient: float = 0.47 # sphere-like for quad
|
| 104 |
+
cross_section_area: float = 0.035 # m² (frame profile)
|
| 105 |
+
|
| 106 |
+
# Rotor drag during translation (blade flapping effect)
|
| 107 |
+
rotor_drag_coefficient: float = 0.0085 # empirical
|
| 108 |
+
|
| 109 |
+
# === FLIGHT ENVELOPE (from PX4/ArduPilot defaults) ===
|
| 110 |
+
max_velocity: float = 20.0 # m/s (72 km/h - typical max)
|
| 111 |
+
max_angular_rate: float = 8.0 # rad/s (~460 deg/s - acro mode)
|
| 112 |
+
max_tilt_angle: float = 0.61 # rad (35 deg - safe limit)
|
| 113 |
+
|
| 114 |
+
# === BATTERY - 4S 1500mAh LiPo ===
|
| 115 |
+
battery_voltage_full: float = 16.8 # V (4S at 4.2V/cell)
|
| 116 |
+
battery_voltage_empty: float = 13.2 # V (4S at 3.3V/cell)
|
| 117 |
+
battery_capacity_mah: float = 1500.0 # mAh
|
| 118 |
+
battery_internal_resistance: float = 0.02 # ohms
|
| 119 |
+
|
| 120 |
+
# Power consumption model: P = k1*T + k2*T² (empirical)
|
| 121 |
+
power_k1: float = 8.0 # W/N linear term
|
| 122 |
+
power_k2: float = 0.5 # W/N² quadratic term
|
| 123 |
+
hover_power: float = 180.0 # W at hover (measured)
|
| 124 |
+
|
| 125 |
+
# === SENSOR NOISE (for sim-to-real) ===
|
| 126 |
+
accel_noise_std: float = 0.1 # m/s² (MPU6000 typical)
|
| 127 |
+
gyro_noise_std: float = 0.01 # rad/s
|
| 128 |
+
position_noise_std: float = 0.02 # m (GPS-denied, using VIO)
|
| 129 |
+
velocity_noise_std: float = 0.05 # m/s
|
| 130 |
+
|
| 131 |
+
# === LATENCY (real system delays) ===
|
| 132 |
+
sensor_latency: float = 0.004 # s (4ms - IMU to FC)
|
| 133 |
+
actuator_latency: float = 0.010 # s (10ms - FC to ESC to motor)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@dataclass
|
| 137 |
+
class QuadcopterState:
|
| 138 |
+
"""Full state vector for quadcopter."""
|
| 139 |
+
# Position (NED frame) - meters
|
| 140 |
+
x: float = 0.0
|
| 141 |
+
y: float = 0.0
|
| 142 |
+
z: float = 0.0 # Altitude (positive = up in our sim)
|
| 143 |
+
|
| 144 |
+
# Velocity (body frame) - m/s
|
| 145 |
+
u: float = 0.0 # forward
|
| 146 |
+
v: float = 0.0 # right
|
| 147 |
+
w: float = 0.0 # down
|
| 148 |
+
|
| 149 |
+
# Euler angles (radians)
|
| 150 |
+
phi: float = 0.0 # roll
|
| 151 |
+
theta: float = 0.0 # pitch
|
| 152 |
+
psi: float = 0.0 # yaw
|
| 153 |
+
|
| 154 |
+
# Angular rates (body frame) - rad/s
|
| 155 |
+
p: float = 0.0 # roll rate
|
| 156 |
+
q: float = 0.0 # pitch rate
|
| 157 |
+
r: float = 0.0 # yaw rate
|
| 158 |
+
|
| 159 |
+
# Motor speeds (rad/s) - for 4 motors
|
| 160 |
+
motor_speeds: np.ndarray = field(default_factory=lambda: np.zeros(4))
|
| 161 |
+
|
| 162 |
+
# Battery state
|
| 163 |
+
battery_remaining: float = 1.0
|
| 164 |
+
|
| 165 |
+
def to_array(self) -> np.ndarray:
|
| 166 |
+
"""Convert to numpy array for observation."""
|
| 167 |
+
return np.array([
|
| 168 |
+
self.x, self.y, self.z,
|
| 169 |
+
self.u, self.v, self.w,
|
| 170 |
+
self.phi, self.theta, self.psi,
|
| 171 |
+
self.p, self.q, self.r,
|
| 172 |
+
self.battery_remaining
|
| 173 |
+
], dtype=np.float32)
|
| 174 |
+
|
| 175 |
+
@classmethod
|
| 176 |
+
def from_array(cls, arr: np.ndarray) -> 'QuadcopterState':
|
| 177 |
+
"""Create state from numpy array."""
|
| 178 |
+
return cls(
|
| 179 |
+
x=arr[0], y=arr[1], z=arr[2],
|
| 180 |
+
u=arr[3], v=arr[4], w=arr[5],
|
| 181 |
+
phi=arr[6], theta=arr[7], psi=arr[8],
|
| 182 |
+
p=arr[9], q=arr[10], r=arr[11],
|
| 183 |
+
battery_remaining=arr[12] if len(arr) > 12 else 1.0
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
class QuadcopterFDM:
|
| 188 |
+
"""
|
| 189 |
+
Flight Dynamics Model for quadcopter.
|
| 190 |
+
|
| 191 |
+
Uses rigid body dynamics with:
|
| 192 |
+
- Thrust from 4 motors
|
| 193 |
+
- Gravity
|
| 194 |
+
- Aerodynamic drag
|
| 195 |
+
- Ground effect
|
| 196 |
+
- Wind disturbances
|
| 197 |
+
- Motor dynamics (first-order lag)
|
| 198 |
+
- Sensor noise injection
|
| 199 |
+
|
| 200 |
+
Aligned with PX4 SITL and real hardware measurements.
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
GRAVITY = 9.80665 # m/s² (WGS84 standard)
|
| 204 |
+
AIR_DENSITY_SEA_LEVEL = 1.225 # kg/m³ at sea level, 15°C
|
| 205 |
+
|
| 206 |
+
def __init__(self, config: Optional[QuadcopterConfig] = None, use_jsbsim: bool = False):
|
| 207 |
+
"""
|
| 208 |
+
Args:
|
| 209 |
+
config: Quadcopter physical parameters
|
| 210 |
+
use_jsbsim: If True, use JSBSim for hyper-realistic physics
|
| 211 |
+
"""
|
| 212 |
+
self.config = config or QuadcopterConfig()
|
| 213 |
+
self.state = QuadcopterState()
|
| 214 |
+
self.use_jsbsim = use_jsbsim and JSBSIM_AVAILABLE
|
| 215 |
+
|
| 216 |
+
# Wind model
|
| 217 |
+
self.wind_velocity = np.zeros(3) # [wx, wy, wz] in world frame
|
| 218 |
+
self.turbulence_intensity = 0.0
|
| 219 |
+
|
| 220 |
+
# Motor state (for first-order dynamics)
|
| 221 |
+
self._motor_speeds_actual = np.zeros(4) # Current motor speeds (rad/s)
|
| 222 |
+
self._motor_speeds_commanded = np.zeros(4) # Target speeds
|
| 223 |
+
|
| 224 |
+
# Sensor noise injection (enable for sim-to-real training)
|
| 225 |
+
self.enable_sensor_noise = True
|
| 226 |
+
|
| 227 |
+
# Ground effect model
|
| 228 |
+
self.enable_ground_effect = True
|
| 229 |
+
|
| 230 |
+
# JSBSim integration (if available)
|
| 231 |
+
self.fdm = None
|
| 232 |
+
if self.use_jsbsim:
|
| 233 |
+
self._init_jsbsim()
|
| 234 |
+
|
| 235 |
+
logger.debug(f"QuadcopterFDM initialized (JSBSim={self.use_jsbsim})")
|
| 236 |
+
|
| 237 |
+
def _init_jsbsim(self):
|
| 238 |
+
"""Initialize JSBSim flight dynamics model."""
|
| 239 |
+
if not JSBSIM_AVAILABLE:
|
| 240 |
+
return
|
| 241 |
+
|
| 242 |
+
try:
|
| 243 |
+
# JSBSim setup - would need aircraft definition files
|
| 244 |
+
# For now, we use simplified physics with JSBSim-style realism
|
| 245 |
+
self.fdm = None # jsbsim.FGFDMExec('.')
|
| 246 |
+
logger.info("JSBSim FDM ready for quadcopter simulation")
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.warning(f"JSBSim init failed: {e}, using simplified physics")
|
| 249 |
+
self.use_jsbsim = False
|
| 250 |
+
|
| 251 |
+
def get_air_density(self, altitude: float) -> float:
|
| 252 |
+
"""
|
| 253 |
+
Calculate air density at altitude using ISA model.
|
| 254 |
+
|
| 255 |
+
Args:
|
| 256 |
+
altitude: Height above sea level in meters
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
Air density in kg/m³
|
| 260 |
+
"""
|
| 261 |
+
# International Standard Atmosphere model
|
| 262 |
+
# Valid up to 11km (troposphere)
|
| 263 |
+
T0 = 288.15 # Sea level temp (K)
|
| 264 |
+
L = 0.0065 # Lapse rate (K/m)
|
| 265 |
+
R = 287.05 # Gas constant (J/kg·K)
|
| 266 |
+
|
| 267 |
+
if altitude < 0:
|
| 268 |
+
altitude = 0
|
| 269 |
+
elif altitude > 11000:
|
| 270 |
+
altitude = 11000
|
| 271 |
+
|
| 272 |
+
T = T0 - L * altitude
|
| 273 |
+
rho = self.AIR_DENSITY_SEA_LEVEL * (T / T0) ** (self.GRAVITY / (L * R) - 1)
|
| 274 |
+
return rho
|
| 275 |
+
|
| 276 |
+
def set_wind(self, velocity: np.ndarray, turbulence: float = 0.0):
|
| 277 |
+
"""
|
| 278 |
+
Set wind conditions.
|
| 279 |
+
|
| 280 |
+
Args:
|
| 281 |
+
velocity: Wind velocity [wx, wy, wz] in m/s (world frame)
|
| 282 |
+
turbulence: Turbulence intensity (0-1)
|
| 283 |
+
"""
|
| 284 |
+
self.wind_velocity = np.array(velocity, dtype=np.float32)
|
| 285 |
+
self.turbulence_intensity = np.clip(turbulence, 0, 1)
|
| 286 |
+
|
| 287 |
+
def reset(self, position: Optional[np.ndarray] = None,
|
| 288 |
+
orientation: Optional[np.ndarray] = None):
|
| 289 |
+
"""Reset quadcopter to initial state."""
|
| 290 |
+
self.state = QuadcopterState()
|
| 291 |
+
self._motor_speeds_actual = np.zeros(4)
|
| 292 |
+
self._motor_speeds_commanded = np.zeros(4)
|
| 293 |
+
|
| 294 |
+
if position is not None:
|
| 295 |
+
self.state.x, self.state.y, self.state.z = position
|
| 296 |
+
|
| 297 |
+
if orientation is not None:
|
| 298 |
+
self.state.phi, self.state.theta, self.state.psi = orientation
|
| 299 |
+
|
| 300 |
+
def step(self, motor_commands: np.ndarray, dt: float = 0.01) -> QuadcopterState:
|
| 301 |
+
"""
|
| 302 |
+
Advance physics by one timestep.
|
| 303 |
+
|
| 304 |
+
Args:
|
| 305 |
+
motor_commands: [m1, m2, m3, m4] throttle commands (0-1)
|
| 306 |
+
dt: Timestep in seconds
|
| 307 |
+
|
| 308 |
+
Returns:
|
| 309 |
+
Updated QuadcopterState
|
| 310 |
+
"""
|
| 311 |
+
# Clip motor commands to valid range
|
| 312 |
+
motor_commands = np.clip(motor_commands, 0, 1)
|
| 313 |
+
|
| 314 |
+
# === MOTOR DYNAMICS (first-order lag) ===
|
| 315 |
+
# Real motors don't respond instantly - they have inertia
|
| 316 |
+
# τ * ω̇ + ω = ω_cmd → ω += (ω_cmd - ω) * dt / τ
|
| 317 |
+
min_omega = self.config.min_rpm * (2 * np.pi / 60)
|
| 318 |
+
max_omega = self.config.max_rpm * (2 * np.pi / 60)
|
| 319 |
+
|
| 320 |
+
# Commanded speeds from throttle
|
| 321 |
+
self._motor_speeds_commanded = min_omega + motor_commands * (max_omega - min_omega)
|
| 322 |
+
|
| 323 |
+
# First-order motor response
|
| 324 |
+
tau = self.config.motor_time_constant
|
| 325 |
+
alpha = dt / (tau + dt) # Discretized time constant
|
| 326 |
+
self._motor_speeds_actual += alpha * (self._motor_speeds_commanded - self._motor_speeds_actual)
|
| 327 |
+
|
| 328 |
+
# Store in state for observation
|
| 329 |
+
self.state.motor_speeds = self._motor_speeds_actual.copy()
|
| 330 |
+
|
| 331 |
+
# === THRUST AND TORQUES ===
|
| 332 |
+
thrust, torques = self._calculate_motor_forces()
|
| 333 |
+
|
| 334 |
+
# Ground effect: increased thrust efficiency near ground
|
| 335 |
+
if self.enable_ground_effect and self.state.z < 1.0:
|
| 336 |
+
# Cheeseman-Bennett ground effect model
|
| 337 |
+
# T_ge / T = 1 / (1 - (r/4z)²) where r = rotor radius
|
| 338 |
+
rotor_radius = 0.127 # 5" prop = 0.127m
|
| 339 |
+
z_eff = max(self.state.z, 0.1) # Avoid division issues
|
| 340 |
+
ge_factor = 1.0 / (1.0 - (rotor_radius / (4 * z_eff)) ** 2)
|
| 341 |
+
ge_factor = np.clip(ge_factor, 1.0, 1.5) # Cap at 50% boost
|
| 342 |
+
thrust *= ge_factor
|
| 343 |
+
|
| 344 |
+
# Gravity in body frame
|
| 345 |
+
gravity_body = self._rotate_to_body(np.array([0, 0, -self.GRAVITY * self.config.mass]))
|
| 346 |
+
|
| 347 |
+
# Aerodynamic drag (altitude-adjusted)
|
| 348 |
+
drag = self._calculate_drag()
|
| 349 |
+
|
| 350 |
+
# Rotor drag during translation (H-force)
|
| 351 |
+
rotor_drag = self._calculate_rotor_drag()
|
| 352 |
+
|
| 353 |
+
# Wind forces
|
| 354 |
+
wind_force = self._calculate_wind_force()
|
| 355 |
+
|
| 356 |
+
# === TOTAL FORCES (body frame) ===
|
| 357 |
+
total_force = thrust + gravity_body + drag + rotor_drag + wind_force
|
| 358 |
+
|
| 359 |
+
# Linear acceleration (body frame)
|
| 360 |
+
accel = total_force / self.config.mass
|
| 361 |
+
|
| 362 |
+
# === ANGULAR DYNAMICS ===
|
| 363 |
+
I = np.diag([self.config.Ixx, self.config.Iyy, self.config.Izz])
|
| 364 |
+
omega = np.array([self.state.p, self.state.q, self.state.r])
|
| 365 |
+
|
| 366 |
+
# Euler's equation: I * ω̇ = τ - ω × (I * ω)
|
| 367 |
+
gyro_term = np.cross(omega, I @ omega)
|
| 368 |
+
angular_accel = np.linalg.solve(I, torques - gyro_term)
|
| 369 |
+
|
| 370 |
+
# === INTEGRATION (Semi-implicit Euler for stability) ===
|
| 371 |
+
# Integrate velocities
|
| 372 |
+
self.state.u += accel[0] * dt
|
| 373 |
+
self.state.v += accel[1] * dt
|
| 374 |
+
self.state.w += accel[2] * dt
|
| 375 |
+
|
| 376 |
+
# Integrate angular rates
|
| 377 |
+
self.state.p += angular_accel[0] * dt
|
| 378 |
+
self.state.q += angular_accel[1] * dt
|
| 379 |
+
self.state.r += angular_accel[2] * dt
|
| 380 |
+
|
| 381 |
+
# Velocity limits
|
| 382 |
+
vel_body = np.array([self.state.u, self.state.v, self.state.w])
|
| 383 |
+
vel_mag = np.linalg.norm(vel_body)
|
| 384 |
+
if vel_mag > self.config.max_velocity:
|
| 385 |
+
vel_body = vel_body * self.config.max_velocity / vel_mag
|
| 386 |
+
self.state.u, self.state.v, self.state.w = vel_body
|
| 387 |
+
|
| 388 |
+
# Angular rate limits
|
| 389 |
+
for attr in ['p', 'q', 'r']:
|
| 390 |
+
val = getattr(self.state, attr)
|
| 391 |
+
setattr(self.state, attr, np.clip(val, -self.config.max_angular_rate,
|
| 392 |
+
self.config.max_angular_rate))
|
| 393 |
+
|
| 394 |
+
# Integrate position (convert body velocity to world frame)
|
| 395 |
+
vel_world = self._rotate_to_world(vel_body)
|
| 396 |
+
self.state.x += vel_world[0] * dt
|
| 397 |
+
self.state.y += vel_world[1] * dt
|
| 398 |
+
self.state.z += vel_world[2] * dt
|
| 399 |
+
|
| 400 |
+
# Integrate orientation (using angular rates)
|
| 401 |
+
# Simplified: phi_dot ≈ p, theta_dot ≈ q, psi_dot ≈ r (small angles)
|
| 402 |
+
# More accurate for larger angles:
|
| 403 |
+
c_phi = np.cos(self.state.phi)
|
| 404 |
+
s_phi = np.sin(self.state.phi)
|
| 405 |
+
c_theta = np.cos(self.state.theta)
|
| 406 |
+
t_theta = np.tan(self.state.theta)
|
| 407 |
+
|
| 408 |
+
if abs(c_theta) > 1e-6:
|
| 409 |
+
self.state.phi += (self.state.p + s_phi * t_theta * self.state.q +
|
| 410 |
+
c_phi * t_theta * self.state.r) * dt
|
| 411 |
+
self.state.theta += (c_phi * self.state.q - s_phi * self.state.r) * dt
|
| 412 |
+
self.state.psi += (s_phi / c_theta * self.state.q +
|
| 413 |
+
c_phi / c_theta * self.state.r) * dt
|
| 414 |
+
|
| 415 |
+
# Wrap angles to [-pi, pi]
|
| 416 |
+
self.state.phi = self._wrap_angle(self.state.phi)
|
| 417 |
+
self.state.theta = self._wrap_angle(self.state.theta)
|
| 418 |
+
self.state.psi = self._wrap_angle(self.state.psi)
|
| 419 |
+
|
| 420 |
+
# Ground collision
|
| 421 |
+
if self.state.z < 0:
|
| 422 |
+
self.state.z = 0
|
| 423 |
+
self.state.w = max(0, self.state.w) # Stop downward velocity
|
| 424 |
+
|
| 425 |
+
# Battery drain (convert mAh to Wh: Wh = mAh * V / 1000)
|
| 426 |
+
# At nominal voltage (~15V for 4S), 1500mAh = ~22.5 Wh
|
| 427 |
+
power = self._calculate_power_consumption(motor_commands)
|
| 428 |
+
battery_wh = self.config.battery_capacity_mah * 15.0 / 1000.0 # ~22.5 Wh
|
| 429 |
+
self.state.battery_remaining -= power * dt / (battery_wh * 3600) # Convert Wh to Ws
|
| 430 |
+
self.state.battery_remaining = max(0, self.state.battery_remaining)
|
| 431 |
+
|
| 432 |
+
return self.state
|
| 433 |
+
|
| 434 |
+
def _calculate_motor_forces(self) -> Tuple[np.ndarray, np.ndarray]:
|
| 435 |
+
"""
|
| 436 |
+
Calculate thrust and torques from motor speeds.
|
| 437 |
+
|
| 438 |
+
Motor layout (X-config):
|
| 439 |
+
1 (CCW) 2 (CW)
|
| 440 |
+
\ /
|
| 441 |
+
\ /
|
| 442 |
+
[+]
|
| 443 |
+
/ \
|
| 444 |
+
/ \
|
| 445 |
+
4 (CW) 3 (CCW)
|
| 446 |
+
"""
|
| 447 |
+
# Thrust from each motor (F = k_t * omega^2)
|
| 448 |
+
k_t = self.config.thrust_coefficient
|
| 449 |
+
thrusts = k_t * self.state.motor_speeds ** 2
|
| 450 |
+
|
| 451 |
+
# Limit per-motor thrust
|
| 452 |
+
thrusts = np.clip(thrusts, 0, self.config.max_thrust_per_motor)
|
| 453 |
+
|
| 454 |
+
# Total thrust (upward in body frame)
|
| 455 |
+
total_thrust = np.array([0, 0, np.sum(thrusts)])
|
| 456 |
+
|
| 457 |
+
# Torques from thrust differential
|
| 458 |
+
L = self.config.arm_length
|
| 459 |
+
|
| 460 |
+
# Roll torque (y-axis): motors 1,4 vs 2,3
|
| 461 |
+
tau_phi = L * (thrusts[0] + thrusts[3] - thrusts[1] - thrusts[2]) / np.sqrt(2)
|
| 462 |
+
|
| 463 |
+
# Pitch torque (x-axis): motors 1,2 vs 3,4
|
| 464 |
+
tau_theta = L * (thrusts[0] + thrusts[1] - thrusts[2] - thrusts[3]) / np.sqrt(2)
|
| 465 |
+
|
| 466 |
+
# Yaw torque (z-axis): CCW vs CW motors
|
| 467 |
+
k_q = self.config.torque_coefficient
|
| 468 |
+
reaction_torques = k_q * self.state.motor_speeds ** 2
|
| 469 |
+
tau_psi = (reaction_torques[0] + reaction_torques[2] -
|
| 470 |
+
reaction_torques[1] - reaction_torques[3])
|
| 471 |
+
|
| 472 |
+
torques = np.array([tau_phi, tau_theta, tau_psi])
|
| 473 |
+
|
| 474 |
+
return total_thrust, torques
|
| 475 |
+
|
| 476 |
+
def _calculate_drag(self) -> np.ndarray:
|
| 477 |
+
"""Calculate aerodynamic drag in body frame."""
|
| 478 |
+
vel_body = np.array([self.state.u, self.state.v, self.state.w])
|
| 479 |
+
vel_mag = np.linalg.norm(vel_body)
|
| 480 |
+
|
| 481 |
+
if vel_mag < 0.01:
|
| 482 |
+
return np.zeros(3)
|
| 483 |
+
|
| 484 |
+
# Air density at current altitude
|
| 485 |
+
rho = self.get_air_density(self.state.z)
|
| 486 |
+
|
| 487 |
+
# D = 0.5 * ρ * v² * Cd * A
|
| 488 |
+
drag_mag = (0.5 * rho * vel_mag ** 2 *
|
| 489 |
+
self.config.drag_coefficient * self.config.cross_section_area)
|
| 490 |
+
|
| 491 |
+
# Drag opposes velocity
|
| 492 |
+
drag = -drag_mag * vel_body / vel_mag
|
| 493 |
+
|
| 494 |
+
return drag
|
| 495 |
+
|
| 496 |
+
def _calculate_rotor_drag(self) -> np.ndarray:
|
| 497 |
+
"""
|
| 498 |
+
Calculate rotor drag (H-force) during translation.
|
| 499 |
+
|
| 500 |
+
When moving horizontally, tilted rotors produce a drag component
|
| 501 |
+
proportional to airspeed. This is the dominant drag source for quads.
|
| 502 |
+
|
| 503 |
+
Based on: "Modelling and Control of a Quadrotor UAV" (Pounds et al.)
|
| 504 |
+
"""
|
| 505 |
+
vel_body = np.array([self.state.u, self.state.v, self.state.w])
|
| 506 |
+
vel_horiz = np.array([vel_body[0], vel_body[1], 0])
|
| 507 |
+
vel_horiz_mag = np.linalg.norm(vel_horiz)
|
| 508 |
+
|
| 509 |
+
if vel_horiz_mag < 0.1:
|
| 510 |
+
return np.zeros(3)
|
| 511 |
+
|
| 512 |
+
# H-force = k_d * v_horizontal * Ω_avg
|
| 513 |
+
# where Ω_avg is average rotor speed
|
| 514 |
+
omega_avg = np.mean(self._motor_speeds_actual)
|
| 515 |
+
k_d = self.config.rotor_drag_coefficient
|
| 516 |
+
|
| 517 |
+
h_force_mag = k_d * vel_horiz_mag * omega_avg
|
| 518 |
+
h_force = -h_force_mag * vel_horiz / vel_horiz_mag
|
| 519 |
+
|
| 520 |
+
return np.array([h_force[0], h_force[1], 0])
|
| 521 |
+
|
| 522 |
+
def _calculate_wind_force(self) -> np.ndarray:
|
| 523 |
+
"""Calculate force from wind in body frame."""
|
| 524 |
+
if np.linalg.norm(self.wind_velocity) < 0.01:
|
| 525 |
+
return np.zeros(3)
|
| 526 |
+
|
| 527 |
+
# Dryden turbulence model (simplified)
|
| 528 |
+
# Real turbulence is correlated, not white noise
|
| 529 |
+
turb = np.random.randn(3) * self.turbulence_intensity * 2.0
|
| 530 |
+
effective_wind = self.wind_velocity + turb
|
| 531 |
+
|
| 532 |
+
# Convert wind to body frame
|
| 533 |
+
wind_body = self._rotate_to_body(effective_wind)
|
| 534 |
+
|
| 535 |
+
# Air density at altitude
|
| 536 |
+
rho = self.get_air_density(self.state.z)
|
| 537 |
+
|
| 538 |
+
# Wind acts as additional drag
|
| 539 |
+
wind_mag = np.linalg.norm(wind_body)
|
| 540 |
+
force_mag = (0.5 * rho * wind_mag ** 2 *
|
| 541 |
+
self.config.drag_coefficient * self.config.cross_section_area)
|
| 542 |
+
|
| 543 |
+
if wind_mag > 0.01:
|
| 544 |
+
force = force_mag * wind_body / wind_mag
|
| 545 |
+
else:
|
| 546 |
+
force = np.zeros(3)
|
| 547 |
+
|
| 548 |
+
return force
|
| 549 |
+
|
| 550 |
+
def _calculate_power_consumption(self, motor_commands: np.ndarray) -> float:
|
| 551 |
+
"""
|
| 552 |
+
Estimate power consumption using physics-based model.
|
| 553 |
+
|
| 554 |
+
P = Σ(k1 * T_i + k2 * T_i²) where T_i is thrust per motor
|
| 555 |
+
Based on motor efficiency curves from T-Motor datasheets.
|
| 556 |
+
"""
|
| 557 |
+
# Current thrust per motor
|
| 558 |
+
k_t = self.config.thrust_coefficient
|
| 559 |
+
thrusts = k_t * self._motor_speeds_actual ** 2
|
| 560 |
+
thrusts = np.clip(thrusts, 0, self.config.max_thrust_per_motor)
|
| 561 |
+
|
| 562 |
+
# Power model
|
| 563 |
+
power = 0.0
|
| 564 |
+
for T in thrusts:
|
| 565 |
+
power += self.config.power_k1 * T + self.config.power_k2 * T ** 2
|
| 566 |
+
|
| 567 |
+
# Add avionics overhead (~5W)
|
| 568 |
+
power += 5.0
|
| 569 |
+
|
| 570 |
+
return power
|
| 571 |
+
|
| 572 |
+
def get_noisy_observation(self) -> np.ndarray:
|
| 573 |
+
"""
|
| 574 |
+
Get state observation with realistic sensor noise.
|
| 575 |
+
|
| 576 |
+
Use this for sim-to-real training. Real IMUs, GPS, etc. have noise.
|
| 577 |
+
"""
|
| 578 |
+
obs = self.state.to_array()
|
| 579 |
+
|
| 580 |
+
if not self.enable_sensor_noise:
|
| 581 |
+
return obs
|
| 582 |
+
|
| 583 |
+
# Position noise (VIO/GPS-like)
|
| 584 |
+
obs[0:3] += np.random.randn(3) * self.config.position_noise_std
|
| 585 |
+
|
| 586 |
+
# Velocity noise
|
| 587 |
+
obs[3:6] += np.random.randn(3) * self.config.velocity_noise_std
|
| 588 |
+
|
| 589 |
+
# Orientation noise (gyro integration drift)
|
| 590 |
+
obs[6:9] += np.random.randn(3) * 0.01 # ~0.5 deg
|
| 591 |
+
|
| 592 |
+
# Angular rate noise (gyro)
|
| 593 |
+
obs[9:12] += np.random.randn(3) * self.config.gyro_noise_std
|
| 594 |
+
|
| 595 |
+
return obs
|
| 596 |
+
|
| 597 |
+
def _rotate_to_body(self, vec_world: np.ndarray) -> np.ndarray:
|
| 598 |
+
"""Rotate vector from world frame to body frame."""
|
| 599 |
+
R = self._rotation_matrix()
|
| 600 |
+
return R.T @ vec_world
|
| 601 |
+
|
| 602 |
+
def _rotate_to_world(self, vec_body: np.ndarray) -> np.ndarray:
|
| 603 |
+
"""Rotate vector from body frame to world frame."""
|
| 604 |
+
R = self._rotation_matrix()
|
| 605 |
+
return R @ vec_body
|
| 606 |
+
|
| 607 |
+
def _rotation_matrix(self) -> np.ndarray:
|
| 608 |
+
"""Get rotation matrix from body to world frame (ZYX Euler)."""
|
| 609 |
+
c_phi = np.cos(self.state.phi)
|
| 610 |
+
s_phi = np.sin(self.state.phi)
|
| 611 |
+
c_theta = np.cos(self.state.theta)
|
| 612 |
+
s_theta = np.sin(self.state.theta)
|
| 613 |
+
c_psi = np.cos(self.state.psi)
|
| 614 |
+
s_psi = np.sin(self.state.psi)
|
| 615 |
+
|
| 616 |
+
R = np.array([
|
| 617 |
+
[c_psi * c_theta, c_psi * s_theta * s_phi - s_psi * c_phi,
|
| 618 |
+
c_psi * s_theta * c_phi + s_psi * s_phi],
|
| 619 |
+
[s_psi * c_theta, s_psi * s_theta * s_phi + c_psi * c_phi,
|
| 620 |
+
s_psi * s_theta * c_phi - c_psi * s_phi],
|
| 621 |
+
[-s_theta, c_theta * s_phi, c_theta * c_phi]
|
| 622 |
+
])
|
| 623 |
+
|
| 624 |
+
return R
|
| 625 |
+
|
| 626 |
+
@staticmethod
|
| 627 |
+
def _wrap_angle(angle: float) -> float:
|
| 628 |
+
"""Wrap angle to [-pi, pi]."""
|
| 629 |
+
while angle > np.pi:
|
| 630 |
+
angle -= 2 * np.pi
|
| 631 |
+
while angle < -np.pi:
|
| 632 |
+
angle += 2 * np.pi
|
| 633 |
+
return angle
|
| 634 |
+
|
| 635 |
+
|
| 636 |
+
class QuadcopterEnv(gym.Env):
|
| 637 |
+
"""
|
| 638 |
+
Gymnasium environment for single quadcopter control.
|
| 639 |
+
|
| 640 |
+
Observation (13 dims):
|
| 641 |
+
- Position: x, y, z
|
| 642 |
+
- Velocity: u, v, w (body frame)
|
| 643 |
+
- Orientation: phi, theta, psi
|
| 644 |
+
- Angular rates: p, q, r
|
| 645 |
+
- Battery remaining
|
| 646 |
+
|
| 647 |
+
Action (4 dims):
|
| 648 |
+
- Motor commands: [m1, m2, m3, m4] in [0, 1]
|
| 649 |
+
|
| 650 |
+
Reward:
|
| 651 |
+
- Configurable based on task (hover, waypoint, etc.)
|
| 652 |
+
"""
|
| 653 |
+
|
| 654 |
+
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
|
| 655 |
+
|
| 656 |
+
def __init__(self,
|
| 657 |
+
render_mode: Optional[str] = None,
|
| 658 |
+
config: Optional[QuadcopterConfig] = None,
|
| 659 |
+
task: str = "hover",
|
| 660 |
+
max_steps: int = 1000):
|
| 661 |
+
"""
|
| 662 |
+
Args:
|
| 663 |
+
render_mode: "human" for visualization, None for headless
|
| 664 |
+
config: Quadcopter configuration
|
| 665 |
+
task: "hover", "waypoint", "tracking"
|
| 666 |
+
max_steps: Episode length
|
| 667 |
+
"""
|
| 668 |
+
super().__init__()
|
| 669 |
+
|
| 670 |
+
self.render_mode = render_mode
|
| 671 |
+
self.task = task
|
| 672 |
+
self.max_steps = max_steps
|
| 673 |
+
|
| 674 |
+
# Physics engine
|
| 675 |
+
self.fdm = QuadcopterFDM(config=config)
|
| 676 |
+
|
| 677 |
+
# Observation space: 13 continuous values
|
| 678 |
+
self.observation_space = spaces.Box(
|
| 679 |
+
low=np.array([-100, -100, 0, -20, -20, -20,
|
| 680 |
+
-np.pi, -np.pi/2, -np.pi, -5, -5, -5, 0]),
|
| 681 |
+
high=np.array([100, 100, 100, 20, 20, 20,
|
| 682 |
+
np.pi, np.pi/2, np.pi, 5, 5, 5, 1]),
|
| 683 |
+
dtype=np.float32
|
| 684 |
+
)
|
| 685 |
+
|
| 686 |
+
# Action space: 4 motor throttles
|
| 687 |
+
self.action_space = spaces.Box(
|
| 688 |
+
low=np.zeros(4),
|
| 689 |
+
high=np.ones(4),
|
| 690 |
+
dtype=np.float32
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
# Task parameters
|
| 694 |
+
self.target_position = np.array([0, 0, 2.0]) # Default hover at 2m
|
| 695 |
+
self.target_velocity = np.zeros(3)
|
| 696 |
+
|
| 697 |
+
# Episode tracking
|
| 698 |
+
self.step_count = 0
|
| 699 |
+
self.total_reward = 0.0
|
| 700 |
+
|
| 701 |
+
# Rendering
|
| 702 |
+
self.viewer = None
|
| 703 |
+
|
| 704 |
+
logger.debug(f"QuadcopterEnv created (task={task})")
|
| 705 |
+
|
| 706 |
+
def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
|
| 707 |
+
"""Reset environment to initial state."""
|
| 708 |
+
super().reset(seed=seed)
|
| 709 |
+
|
| 710 |
+
# Random initial position (slight variation)
|
| 711 |
+
init_pos = np.array([
|
| 712 |
+
self.np_random.uniform(-0.5, 0.5),
|
| 713 |
+
self.np_random.uniform(-0.5, 0.5),
|
| 714 |
+
self.np_random.uniform(0.1, 0.5)
|
| 715 |
+
])
|
| 716 |
+
|
| 717 |
+
self.fdm.reset(position=init_pos)
|
| 718 |
+
|
| 719 |
+
# Randomize wind (optional)
|
| 720 |
+
if options and options.get('random_wind', False):
|
| 721 |
+
wind = self.np_random.uniform(-3, 3, size=3)
|
| 722 |
+
wind[2] = 0 # No vertical wind
|
| 723 |
+
self.fdm.set_wind(wind, turbulence=0.2)
|
| 724 |
+
|
| 725 |
+
self.step_count = 0
|
| 726 |
+
self.total_reward = 0.0
|
| 727 |
+
|
| 728 |
+
return self._get_obs(), {}
|
| 729 |
+
|
| 730 |
+
def step(self, action: np.ndarray):
|
| 731 |
+
"""Execute one environment step."""
|
| 732 |
+
# Advance physics
|
| 733 |
+
self.fdm.step(action, dt=0.01)
|
| 734 |
+
self.step_count += 1
|
| 735 |
+
|
| 736 |
+
# Get observation
|
| 737 |
+
obs = self._get_obs()
|
| 738 |
+
|
| 739 |
+
# Calculate reward
|
| 740 |
+
reward = self._calculate_reward()
|
| 741 |
+
self.total_reward += reward
|
| 742 |
+
|
| 743 |
+
# Check termination
|
| 744 |
+
terminated = self._check_terminated()
|
| 745 |
+
truncated = self.step_count >= self.max_steps
|
| 746 |
+
|
| 747 |
+
info = {
|
| 748 |
+
'position': np.array([self.fdm.state.x, self.fdm.state.y, self.fdm.state.z]),
|
| 749 |
+
'battery': self.fdm.state.battery_remaining,
|
| 750 |
+
'step': self.step_count
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
return obs, reward, terminated, truncated, info
|
| 754 |
+
|
| 755 |
+
def _get_obs(self) -> np.ndarray:
|
| 756 |
+
"""Get current observation."""
|
| 757 |
+
return self.fdm.state.to_array()
|
| 758 |
+
|
| 759 |
+
def _calculate_reward(self) -> float:
|
| 760 |
+
"""Calculate reward based on task."""
|
| 761 |
+
pos = np.array([self.fdm.state.x, self.fdm.state.y, self.fdm.state.z])
|
| 762 |
+
vel = np.array([self.fdm.state.u, self.fdm.state.v, self.fdm.state.w])
|
| 763 |
+
|
| 764 |
+
if self.task == "hover":
|
| 765 |
+
# Reward for staying at target position
|
| 766 |
+
pos_error = np.linalg.norm(pos - self.target_position)
|
| 767 |
+
vel_error = np.linalg.norm(vel)
|
| 768 |
+
|
| 769 |
+
# Exponential reward shaping
|
| 770 |
+
reward = np.exp(-pos_error) + 0.1 * np.exp(-vel_error)
|
| 771 |
+
|
| 772 |
+
# Penalty for attitude deviation
|
| 773 |
+
attitude_error = abs(self.fdm.state.phi) + abs(self.fdm.state.theta)
|
| 774 |
+
reward -= 0.1 * attitude_error
|
| 775 |
+
|
| 776 |
+
elif self.task == "waypoint":
|
| 777 |
+
# Reward for reaching waypoint
|
| 778 |
+
dist = np.linalg.norm(pos - self.target_position)
|
| 779 |
+
reward = -dist # Negative distance as reward
|
| 780 |
+
|
| 781 |
+
if dist < 0.5: # Waypoint reached
|
| 782 |
+
reward += 10.0
|
| 783 |
+
|
| 784 |
+
else: # tracking
|
| 785 |
+
# Reward for following target velocity
|
| 786 |
+
vel_error = np.linalg.norm(vel - self.target_velocity)
|
| 787 |
+
reward = -vel_error
|
| 788 |
+
|
| 789 |
+
return float(reward)
|
| 790 |
+
|
| 791 |
+
def _check_terminated(self) -> bool:
|
| 792 |
+
"""Check if episode should terminate."""
|
| 793 |
+
# Ground crash
|
| 794 |
+
if self.fdm.state.z < 0.05:
|
| 795 |
+
return True
|
| 796 |
+
|
| 797 |
+
# Out of bounds
|
| 798 |
+
if abs(self.fdm.state.x) > 50 or abs(self.fdm.state.y) > 50:
|
| 799 |
+
return True
|
| 800 |
+
|
| 801 |
+
# Too high
|
| 802 |
+
if self.fdm.state.z > 50:
|
| 803 |
+
return True
|
| 804 |
+
|
| 805 |
+
# Battery dead
|
| 806 |
+
if self.fdm.state.battery_remaining <= 0:
|
| 807 |
+
return True
|
| 808 |
+
|
| 809 |
+
# Extreme attitude (flipped)
|
| 810 |
+
if abs(self.fdm.state.phi) > np.pi/2 or abs(self.fdm.state.theta) > np.pi/2:
|
| 811 |
+
return True
|
| 812 |
+
|
| 813 |
+
return False
|
| 814 |
+
|
| 815 |
+
def render(self):
|
| 816 |
+
"""Render the environment."""
|
| 817 |
+
if self.render_mode is None:
|
| 818 |
+
return None
|
| 819 |
+
|
| 820 |
+
if self.render_mode == "human":
|
| 821 |
+
self._render_human()
|
| 822 |
+
elif self.render_mode == "rgb_array":
|
| 823 |
+
return self._render_rgb()
|
| 824 |
+
|
| 825 |
+
def _render_human(self):
|
| 826 |
+
"""Simple text-based rendering for now."""
|
| 827 |
+
pos = [self.fdm.state.x, self.fdm.state.y, self.fdm.state.z]
|
| 828 |
+
att = [np.degrees(self.fdm.state.phi),
|
| 829 |
+
np.degrees(self.fdm.state.theta),
|
| 830 |
+
np.degrees(self.fdm.state.psi)]
|
| 831 |
+
|
| 832 |
+
print(f"\rPos: [{pos[0]:6.2f}, {pos[1]:6.2f}, {pos[2]:6.2f}] "
|
| 833 |
+
f"Att: [{att[0]:5.1f}°, {att[1]:5.1f}°, {att[2]:5.1f}°] "
|
| 834 |
+
f"Bat: {self.fdm.state.battery_remaining*100:4.1f}%", end='')
|
| 835 |
+
|
| 836 |
+
def _render_rgb(self) -> np.ndarray:
|
| 837 |
+
"""Render to RGB array (placeholder)."""
|
| 838 |
+
# Would need proper 3D rendering here
|
| 839 |
+
# For now, return empty frame
|
| 840 |
+
return np.zeros((480, 640, 3), dtype=np.uint8)
|
| 841 |
+
|
| 842 |
+
def close(self):
|
| 843 |
+
"""Clean up resources."""
|
| 844 |
+
if self.viewer:
|
| 845 |
+
self.viewer = None
|
| 846 |
+
|
| 847 |
+
def set_target(self, position: Optional[np.ndarray] = None,
|
| 848 |
+
velocity: Optional[np.ndarray] = None):
|
| 849 |
+
"""Set task target."""
|
| 850 |
+
if position is not None:
|
| 851 |
+
self.target_position = np.array(position)
|
| 852 |
+
if velocity is not None:
|
| 853 |
+
self.target_velocity = np.array(velocity)
|
| 854 |
+
|
| 855 |
+
|
| 856 |
+
class MultiQuadcopterEnv(gym.Env):
|
| 857 |
+
"""
|
| 858 |
+
Multi-agent quadcopter environment for swarm battles.
|
| 859 |
+
|
| 860 |
+
Each agent controls one quadcopter.
|
| 861 |
+
Supports cooperative and competitive scenarios.
|
| 862 |
+
"""
|
| 863 |
+
|
| 864 |
+
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
|
| 865 |
+
|
| 866 |
+
def __init__(self,
|
| 867 |
+
num_drones: int = 4,
|
| 868 |
+
render_mode: Optional[str] = None,
|
| 869 |
+
config: Optional[QuadcopterConfig] = None,
|
| 870 |
+
arena_size: float = 20.0,
|
| 871 |
+
battle_mode: bool = True):
|
| 872 |
+
"""
|
| 873 |
+
Args:
|
| 874 |
+
num_drones: Number of quadcopters
|
| 875 |
+
render_mode: Visualization mode
|
| 876 |
+
config: Shared drone configuration
|
| 877 |
+
arena_size: Size of arena (meters)
|
| 878 |
+
battle_mode: If True, drones can tag each other
|
| 879 |
+
"""
|
| 880 |
+
super().__init__()
|
| 881 |
+
|
| 882 |
+
self.num_drones = num_drones
|
| 883 |
+
self.render_mode = render_mode
|
| 884 |
+
self.arena_size = arena_size
|
| 885 |
+
self.battle_mode = battle_mode
|
| 886 |
+
|
| 887 |
+
# Create drones
|
| 888 |
+
self.drones = [QuadcopterFDM(config=config) for _ in range(num_drones)]
|
| 889 |
+
|
| 890 |
+
# Teams (first half blue, second half red)
|
| 891 |
+
self.teams = ['blue' if i < num_drones // 2 else 'red'
|
| 892 |
+
for i in range(num_drones)]
|
| 893 |
+
|
| 894 |
+
# Combat state
|
| 895 |
+
self.health = np.ones(num_drones)
|
| 896 |
+
self.tag_cooldowns = np.zeros(num_drones)
|
| 897 |
+
self.tags_scored = np.zeros(num_drones, dtype=int)
|
| 898 |
+
|
| 899 |
+
# Tag parameters
|
| 900 |
+
self.tag_range = 2.0 # meters
|
| 901 |
+
self.tag_cooldown_time = 3.0 # seconds
|
| 902 |
+
self.tag_damage = 0.2
|
| 903 |
+
|
| 904 |
+
# Observation: own state (13) + relative positions of others (3 * (n-1))
|
| 905 |
+
obs_dim = 13 + 3 * (num_drones - 1)
|
| 906 |
+
self.observation_space = spaces.Box(
|
| 907 |
+
low=-np.inf, high=np.inf, shape=(num_drones, obs_dim), dtype=np.float32
|
| 908 |
+
)
|
| 909 |
+
|
| 910 |
+
# Action: each drone has 4 motor commands
|
| 911 |
+
self.action_space = spaces.Box(
|
| 912 |
+
low=0, high=1, shape=(num_drones, 4), dtype=np.float32
|
| 913 |
+
)
|
| 914 |
+
|
| 915 |
+
self.step_count = 0
|
| 916 |
+
self.max_steps = 1000
|
| 917 |
+
|
| 918 |
+
logger.debug(f"MultiQuadcopterEnv created ({num_drones} drones)")
|
| 919 |
+
|
| 920 |
+
def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
|
| 921 |
+
"""Reset all drones."""
|
| 922 |
+
super().reset(seed=seed)
|
| 923 |
+
|
| 924 |
+
# Spawn drones in formation
|
| 925 |
+
for i, drone in enumerate(self.drones):
|
| 926 |
+
angle = 2 * np.pi * i / self.num_drones
|
| 927 |
+
radius = self.arena_size / 4
|
| 928 |
+
x = radius * np.cos(angle)
|
| 929 |
+
y = radius * np.sin(angle)
|
| 930 |
+
z = 2.0 + self.np_random.uniform(-0.5, 0.5)
|
| 931 |
+
|
| 932 |
+
drone.reset(position=np.array([x, y, z]))
|
| 933 |
+
|
| 934 |
+
# Reset combat state
|
| 935 |
+
self.health = np.ones(self.num_drones)
|
| 936 |
+
self.tag_cooldowns = np.zeros(self.num_drones)
|
| 937 |
+
self.tags_scored = np.zeros(self.num_drones, dtype=int)
|
| 938 |
+
|
| 939 |
+
self.step_count = 0
|
| 940 |
+
|
| 941 |
+
return self._get_all_obs(), {}
|
| 942 |
+
|
| 943 |
+
def step(self, actions: np.ndarray):
|
| 944 |
+
"""Step all drones."""
|
| 945 |
+
dt = 0.01
|
| 946 |
+
|
| 947 |
+
# Update physics for each drone
|
| 948 |
+
for i, drone in enumerate(self.drones):
|
| 949 |
+
if self.health[i] > 0:
|
| 950 |
+
drone.step(actions[i], dt=dt)
|
| 951 |
+
|
| 952 |
+
# Process combat (if enabled)
|
| 953 |
+
if self.battle_mode:
|
| 954 |
+
self._process_combat(dt)
|
| 955 |
+
|
| 956 |
+
self.step_count += 1
|
| 957 |
+
|
| 958 |
+
# Get observations and rewards
|
| 959 |
+
obs = self._get_all_obs()
|
| 960 |
+
rewards = self._calculate_rewards()
|
| 961 |
+
|
| 962 |
+
# Check termination
|
| 963 |
+
terminated = self._check_terminated()
|
| 964 |
+
truncated = self.step_count >= self.max_steps
|
| 965 |
+
|
| 966 |
+
info = {
|
| 967 |
+
'health': self.health.copy(),
|
| 968 |
+
'tags': self.tags_scored.copy(),
|
| 969 |
+
'teams': self.teams
|
| 970 |
+
}
|
| 971 |
+
|
| 972 |
+
return obs, rewards, terminated, truncated, info
|
| 973 |
+
|
| 974 |
+
def _get_all_obs(self) -> np.ndarray:
|
| 975 |
+
"""Get observations for all drones."""
|
| 976 |
+
obs_dim = 13 + 3 * (self.num_drones - 1)
|
| 977 |
+
obs = np.zeros((self.num_drones, obs_dim), dtype=np.float32)
|
| 978 |
+
|
| 979 |
+
# Get positions for relative calculations
|
| 980 |
+
positions = np.array([[d.state.x, d.state.y, d.state.z] for d in self.drones])
|
| 981 |
+
|
| 982 |
+
for i, drone in enumerate(self.drones):
|
| 983 |
+
# Own state
|
| 984 |
+
obs[i, :13] = drone.state.to_array()
|
| 985 |
+
|
| 986 |
+
# Relative positions of other drones
|
| 987 |
+
idx = 13
|
| 988 |
+
for j, other in enumerate(self.drones):
|
| 989 |
+
if i != j:
|
| 990 |
+
rel_pos = positions[j] - positions[i]
|
| 991 |
+
obs[i, idx:idx+3] = rel_pos
|
| 992 |
+
idx += 3
|
| 993 |
+
|
| 994 |
+
return obs
|
| 995 |
+
|
| 996 |
+
def _process_combat(self, dt: float):
|
| 997 |
+
"""Process drone combat (tagging)."""
|
| 998 |
+
# Update cooldowns
|
| 999 |
+
self.tag_cooldowns = np.maximum(0, self.tag_cooldowns - dt)
|
| 1000 |
+
|
| 1001 |
+
# Get positions
|
| 1002 |
+
positions = np.array([[d.state.x, d.state.y, d.state.z] for d in self.drones])
|
| 1003 |
+
|
| 1004 |
+
# Check for tags
|
| 1005 |
+
for i in range(self.num_drones):
|
| 1006 |
+
if self.health[i] <= 0 or self.tag_cooldowns[i] > 0:
|
| 1007 |
+
continue
|
| 1008 |
+
|
| 1009 |
+
for j in range(self.num_drones):
|
| 1010 |
+
if i == j or self.teams[i] == self.teams[j]:
|
| 1011 |
+
continue
|
| 1012 |
+
|
| 1013 |
+
if self.health[j] <= 0:
|
| 1014 |
+
continue
|
| 1015 |
+
|
| 1016 |
+
# Check range
|
| 1017 |
+
dist = np.linalg.norm(positions[i] - positions[j])
|
| 1018 |
+
if dist < self.tag_range:
|
| 1019 |
+
# Tag successful!
|
| 1020 |
+
self.health[j] -= self.tag_damage
|
| 1021 |
+
self.tag_cooldowns[i] = self.tag_cooldown_time
|
| 1022 |
+
self.tags_scored[i] += 1
|
| 1023 |
+
|
| 1024 |
+
logger.debug(f"Drone {i} tagged drone {j}! Health: {self.health[j]:.2f}")
|
| 1025 |
+
|
| 1026 |
+
def _calculate_rewards(self) -> np.ndarray:
|
| 1027 |
+
"""Calculate rewards for all drones."""
|
| 1028 |
+
rewards = np.zeros(self.num_drones)
|
| 1029 |
+
|
| 1030 |
+
for i in range(self.num_drones):
|
| 1031 |
+
# Survival reward
|
| 1032 |
+
rewards[i] = 0.01 if self.health[i] > 0 else 0
|
| 1033 |
+
|
| 1034 |
+
# Tag reward
|
| 1035 |
+
if self.tags_scored[i] > 0:
|
| 1036 |
+
rewards[i] += 1.0 * self.tags_scored[i]
|
| 1037 |
+
|
| 1038 |
+
# Death penalty
|
| 1039 |
+
if self.health[i] <= 0:
|
| 1040 |
+
rewards[i] -= 5.0
|
| 1041 |
+
|
| 1042 |
+
return rewards
|
| 1043 |
+
|
| 1044 |
+
def _check_terminated(self) -> bool:
|
| 1045 |
+
"""Check if battle should end."""
|
| 1046 |
+
# Count alive drones per team
|
| 1047 |
+
blue_alive = sum(1 for i, h in enumerate(self.health)
|
| 1048 |
+
if h > 0 and self.teams[i] == 'blue')
|
| 1049 |
+
red_alive = sum(1 for i, h in enumerate(self.health)
|
| 1050 |
+
if h > 0 and self.teams[i] == 'red')
|
| 1051 |
+
|
| 1052 |
+
# One team eliminated
|
| 1053 |
+
if blue_alive == 0 or red_alive == 0:
|
| 1054 |
+
return True
|
| 1055 |
+
|
| 1056 |
+
return False
|
| 1057 |
+
|
| 1058 |
+
def render(self):
|
| 1059 |
+
"""Render multi-drone environment."""
|
| 1060 |
+
if self.render_mode == "human":
|
| 1061 |
+
print("\n" + "="*60)
|
| 1062 |
+
for i, drone in enumerate(self.drones):
|
| 1063 |
+
team = self.teams[i]
|
| 1064 |
+
status = "ALIVE" if self.health[i] > 0 else "DEAD"
|
| 1065 |
+
pos = [drone.state.x, drone.state.y, drone.state.z]
|
| 1066 |
+
print(f"Drone {i} [{team:4s}] {status:5s} "
|
| 1067 |
+
f"Pos: [{pos[0]:6.2f}, {pos[1]:6.2f}, {pos[2]:6.2f}] "
|
| 1068 |
+
f"HP: {self.health[i]*100:4.0f}% Tags: {self.tags_scored[i]}")
|
| 1069 |
+
|
| 1070 |
+
def close(self):
|
| 1071 |
+
"""Clean up."""
|
| 1072 |
+
pass
|
| 1073 |
+
|
| 1074 |
+
|
| 1075 |
+
# Register environments with Gymnasium
|
| 1076 |
+
def register_quadcopter_envs():
|
| 1077 |
+
"""Register custom quadcopter environments."""
|
| 1078 |
+
try:
|
| 1079 |
+
gym.register(
|
| 1080 |
+
id='Quadcopter-Hover-v1',
|
| 1081 |
+
entry_point='reality_simulator.arena.jsbsim_quadcopter:QuadcopterEnv',
|
| 1082 |
+
kwargs={'task': 'hover'},
|
| 1083 |
+
max_episode_steps=1000
|
| 1084 |
+
)
|
| 1085 |
+
gym.register(
|
| 1086 |
+
id='Quadcopter-Waypoint-v1',
|
| 1087 |
+
entry_point='reality_simulator.arena.jsbsim_quadcopter:QuadcopterEnv',
|
| 1088 |
+
kwargs={'task': 'waypoint'},
|
| 1089 |
+
max_episode_steps=1000
|
| 1090 |
+
)
|
| 1091 |
+
gym.register(
|
| 1092 |
+
id='Quadcopter-Battle-v1',
|
| 1093 |
+
entry_point='reality_simulator.arena.jsbsim_quadcopter:MultiQuadcopterEnv',
|
| 1094 |
+
kwargs={'num_drones': 4, 'battle_mode': True},
|
| 1095 |
+
max_episode_steps=1000
|
| 1096 |
+
)
|
| 1097 |
+
logger.info("✅ Quadcopter environments registered")
|
| 1098 |
+
except Exception as e:
|
| 1099 |
+
logger.debug(f"Env registration skipped: {e}")
|
| 1100 |
+
|
| 1101 |
+
|
| 1102 |
+
# Auto-register on import
|
| 1103 |
+
register_quadcopter_envs()
|
| 1104 |
+
|
| 1105 |
+
|
| 1106 |
+
if __name__ == "__main__":
|
| 1107 |
+
# Quick test
|
| 1108 |
+
print("🚁 Testing QuadcopterFDM...")
|
| 1109 |
+
|
| 1110 |
+
fdm = QuadcopterFDM()
|
| 1111 |
+
fdm.reset(position=np.array([0, 0, 2.0]))
|
| 1112 |
+
fdm.set_wind(np.array([2.0, 0, 0]), turbulence=0.3)
|
| 1113 |
+
|
| 1114 |
+
print(f"Initial position: [{fdm.state.x:.2f}, {fdm.state.y:.2f}, {fdm.state.z:.2f}]")
|
| 1115 |
+
|
| 1116 |
+
# Hover test (equal thrust on all motors)
|
| 1117 |
+
hover_thrust = 0.58 # Approximate hover throttle
|
| 1118 |
+
|
| 1119 |
+
for i in range(100):
|
| 1120 |
+
fdm.step(np.array([hover_thrust, hover_thrust, hover_thrust, hover_thrust]))
|
| 1121 |
+
|
| 1122 |
+
print(f"After 1s hover: [{fdm.state.x:.2f}, {fdm.state.y:.2f}, {fdm.state.z:.2f}]")
|
| 1123 |
+
print(f"Battery: {fdm.state.battery_remaining*100:.1f}%")
|
| 1124 |
+
|
| 1125 |
+
print("\n✅ QuadcopterFDM working!")
|
| 1126 |
+
|
| 1127 |
+
# Test Gymnasium env
|
| 1128 |
+
print("\n🎮 Testing QuadcopterEnv...")
|
| 1129 |
+
env = QuadcopterEnv(render_mode="human", task="hover")
|
| 1130 |
+
obs, _ = env.reset()
|
| 1131 |
+
|
| 1132 |
+
for _ in range(50):
|
| 1133 |
+
action = env.action_space.sample()
|
| 1134 |
+
action[:] = hover_thrust # Try to hover
|
| 1135 |
+
obs, reward, term, trunc, info = env.step(action)
|
| 1136 |
+
env.render()
|
| 1137 |
+
if term or trunc:
|
| 1138 |
+
break
|
| 1139 |
+
|
| 1140 |
+
print(f"\n\nTotal reward: {env.total_reward:.2f}")
|
| 1141 |
+
print("✅ QuadcopterEnv working!")
|
UNPACK/metadata.json
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"generated": null,
|
| 3 |
+
"mode": "ENSEMBLE",
|
| 4 |
+
"ensemble_size": 107,
|
| 5 |
+
"organism_names": [
|
| 6 |
+
"edbc366172639024",
|
| 7 |
+
"86d78ecb17378ff1",
|
| 8 |
+
"cd2e3d9e8344e077",
|
| 9 |
+
"f585fb9f20bb0729",
|
| 10 |
+
"951c9f843b0d9243",
|
| 11 |
+
"fd5dbc8866ea1bde",
|
| 12 |
+
"43ddb19a041390c6",
|
| 13 |
+
"58f7850cc2ed618d",
|
| 14 |
+
"c79f68de668b36e3",
|
| 15 |
+
"81323964002dba96",
|
| 16 |
+
"b168fd01c96dd355",
|
| 17 |
+
"43d8288b2748e1bf",
|
| 18 |
+
"9e6e0b030a372015",
|
| 19 |
+
"9dc419a36357d7a7",
|
| 20 |
+
"c1f6f11bfbc53479",
|
| 21 |
+
"5a584dd72a843b1b",
|
| 22 |
+
"449d555f97089ff4",
|
| 23 |
+
"fbeb2853dc105919",
|
| 24 |
+
"30c6b10eadcdc3e9",
|
| 25 |
+
"7798509f4e099717",
|
| 26 |
+
"9674ac0a0b07650a",
|
| 27 |
+
"fab689bcb08d3e58",
|
| 28 |
+
"93c892a86a589860",
|
| 29 |
+
"d70097c35b0242c8",
|
| 30 |
+
"2e0397589f23af91",
|
| 31 |
+
"858f84cc6270de47",
|
| 32 |
+
"df6a436351b53474",
|
| 33 |
+
"646348e1be52244f",
|
| 34 |
+
"589802d5746181db",
|
| 35 |
+
"c11c5b0df4de0a37",
|
| 36 |
+
"04649226ae9efebb",
|
| 37 |
+
"e8173306bdfd4c13",
|
| 38 |
+
"78870f7003517a3a",
|
| 39 |
+
"6d89bac8dbcfd59c",
|
| 40 |
+
"f4bddc2f5be6686e",
|
| 41 |
+
"33a5293e4c3ac3cf",
|
| 42 |
+
"31d897dc0cafa21a",
|
| 43 |
+
"3414fcd46bc6c66d",
|
| 44 |
+
"c5109ee5294e4a7e",
|
| 45 |
+
"e547dad6892d4c45",
|
| 46 |
+
"2a0a04b7921a1671",
|
| 47 |
+
"92a453e86e1e0e0e",
|
| 48 |
+
"2df24a997db6d851",
|
| 49 |
+
"1345cbbcf514c715",
|
| 50 |
+
"62a276d820a94e68",
|
| 51 |
+
"417bfd09dbf06bf4",
|
| 52 |
+
"c55fa8f9abd047f1",
|
| 53 |
+
"821db11ec8e1952a",
|
| 54 |
+
"2a86a4de18d7a088",
|
| 55 |
+
"a4b6929eb93343bf",
|
| 56 |
+
"56e76c222a39c0e3",
|
| 57 |
+
"98aa5e6a4b474acc",
|
| 58 |
+
"b5c7ef0643d91c56",
|
| 59 |
+
"819596e8f6ee7600",
|
| 60 |
+
"8cda83a3997f0c31",
|
| 61 |
+
"55256341f7b9af24",
|
| 62 |
+
"1438f196417bdb0b",
|
| 63 |
+
"277a3319b1c4cf53",
|
| 64 |
+
"567cf59af9f137b4",
|
| 65 |
+
"4cfaddc9dce4a5f7",
|
| 66 |
+
"b9d3440251c48761",
|
| 67 |
+
"2e2121ad1c57593f",
|
| 68 |
+
"24e7cd88b78393da",
|
| 69 |
+
"a2f1a9edae3711f6",
|
| 70 |
+
"0b58d859da8c0b02",
|
| 71 |
+
"f42be2fb7c734fe8",
|
| 72 |
+
"9e44f76626a0bd6d",
|
| 73 |
+
"745d97256adcdbde",
|
| 74 |
+
"d9d7efccd4f56acb",
|
| 75 |
+
"b7d80845618bc5ae",
|
| 76 |
+
"c988215ab0ae0567",
|
| 77 |
+
"68849731ee30a5db",
|
| 78 |
+
"5e971e526a546789",
|
| 79 |
+
"b340af532366cc7c",
|
| 80 |
+
"59a4a010bd57af65",
|
| 81 |
+
"ca01f4181bf90a0d",
|
| 82 |
+
"c0a3093a306aa9f6",
|
| 83 |
+
"f6fa3568de13430c",
|
| 84 |
+
"f558482357ee27fc",
|
| 85 |
+
"f0b599001944f186",
|
| 86 |
+
"9c71e95851243c24",
|
| 87 |
+
"6e924f6134d2fe59",
|
| 88 |
+
"8c09eb8977720979",
|
| 89 |
+
"1fa598a907e91802",
|
| 90 |
+
"08fdaf4d05ac65a8",
|
| 91 |
+
"731939b8691bdfc0",
|
| 92 |
+
"ffdb2164fe3eefb0",
|
| 93 |
+
"615fe8569ce56dba",
|
| 94 |
+
"787ea58fca362124",
|
| 95 |
+
"6e8090766e191505",
|
| 96 |
+
"221ec40b2bed240d",
|
| 97 |
+
"c38a656005161d6d",
|
| 98 |
+
"4bf524bf5dd7ca28",
|
| 99 |
+
"b40ff22aa6b46340",
|
| 100 |
+
"a8ed3e3b9df0d23b",
|
| 101 |
+
"f57ad03fba4f1062",
|
| 102 |
+
"1141890b4a500eb1",
|
| 103 |
+
"90c2b87c11e71a49",
|
| 104 |
+
"4ce5894e48795ae6",
|
| 105 |
+
"0a7244228613e835",
|
| 106 |
+
"392c4f9ffcb97860",
|
| 107 |
+
"5ee9a85dbd894e10",
|
| 108 |
+
"8ffa19fbf9e1caec",
|
| 109 |
+
"96195a384b90b4ca",
|
| 110 |
+
"73a3c676059a4d06",
|
| 111 |
+
"300e99a67053e897",
|
| 112 |
+
"47cd3c24adc3b8c2"
|
| 113 |
+
],
|
| 114 |
+
"training_config": {
|
| 115 |
+
"learning_rate": 0.001,
|
| 116 |
+
"batch_size": 32,
|
| 117 |
+
"gamma": 0.99,
|
| 118 |
+
"epsilon": 0.1,
|
| 119 |
+
"epsilon_decay": 0.995,
|
| 120 |
+
"epsilon_min": 0.01,
|
| 121 |
+
"rl_loss_weight": 0.8,
|
| 122 |
+
"language_loss_weight": 0.1,
|
| 123 |
+
"concept_loss_weight": 0.1,
|
| 124 |
+
"buffer_size": 10000
|
| 125 |
+
},
|
| 126 |
+
"data_compressed": true,
|
| 127 |
+
"includes_readme": true,
|
| 128 |
+
"includes_tmrl_adapter": true,
|
| 129 |
+
"language_curriculum": {
|
| 130 |
+
"files": [
|
| 131 |
+
"curriculum/connector_words.json",
|
| 132 |
+
"curriculum/dialogue_frames.json",
|
| 133 |
+
"curriculum/game_language_tasks.json",
|
| 134 |
+
"curriculum/reward_rubric.json",
|
| 135 |
+
"curriculum/role_transform_tasks.json",
|
| 136 |
+
"training_logs/schema.json"
|
| 137 |
+
],
|
| 138 |
+
"training_log_schema": "training_logs/schema.json"
|
| 139 |
+
},
|
| 140 |
+
"unpack_outputs": {
|
| 141 |
+
"onnx": "ensemble.onnx",
|
| 142 |
+
"weights": "ensemble_weights.pt"
|
| 143 |
+
}
|
| 144 |
+
}
|
UNPACK/requirements.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Cocoon Ultimate Package Dependencies
|
| 2 |
+
# Install with: pip install -r requirements.txt
|
| 3 |
+
|
| 4 |
+
# Core
|
| 5 |
+
numpy>=1.21.0
|
| 6 |
+
|
| 7 |
+
# Neural network weights + ONNX export
|
| 8 |
+
torch>=2.0.0
|
| 9 |
+
onnx>=1.14.0
|
| 10 |
+
onnxruntime>=1.15.0 # Runtime (CPU)
|
| 11 |
+
# onnxruntime-gpu>=1.15.0 # Uncomment for NVIDIA GPU
|
| 12 |
+
|
| 13 |
+
# P2P Networking (for CocoonLink battles)
|
| 14 |
+
websockets>=11.0
|
| 15 |
+
|
| 16 |
+
# Drone Warfare Arena
|
| 17 |
+
matplotlib>=3.8.0 # Trajectory visualization
|
| 18 |
+
# PyFlyt>=1.0.0 # Optional: 3D drone visualization (pip install PyFlyt)
|
| 19 |
+
|
| 20 |
+
# Gymnasium Environments (Proton Game Arena)
|
| 21 |
+
gymnasium>=0.29.0 # Core RL environments
|
| 22 |
+
pygame>=2.5.0 # Visual rendering
|
UNPACK/training_logs/schema.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"format": "jsonl",
|
| 4 |
+
"default_path": "training_logs/live_learning_trace.jsonl",
|
| 5 |
+
"purpose": "Post-export language/RL learning trace for Council coaches and clone-from-live-state.",
|
| 6 |
+
"required_fields": [
|
| 7 |
+
"timestamp",
|
| 8 |
+
"event_type",
|
| 9 |
+
"stage",
|
| 10 |
+
"input",
|
| 11 |
+
"target",
|
| 12 |
+
"output",
|
| 13 |
+
"reward",
|
| 14 |
+
"score",
|
| 15 |
+
"vocab_size",
|
| 16 |
+
"training_step"
|
| 17 |
+
],
|
| 18 |
+
"event_types": [
|
| 19 |
+
"connector_seed",
|
| 20 |
+
"echo_trial",
|
| 21 |
+
"role_transform_trial",
|
| 22 |
+
"turn_exchange_trial",
|
| 23 |
+
"game_language_binding",
|
| 24 |
+
"clone_dialogue_arena_turn",
|
| 25 |
+
"rl_transition",
|
| 26 |
+
"runtime_save"
|
| 27 |
+
],
|
| 28 |
+
"coach_contract": {
|
| 29 |
+
"speaker": false,
|
| 30 |
+
"judge": true,
|
| 31 |
+
"note": "The outside coach scores Cocoon outputs; it should not dump raw prompt scaffolds into training text."
|
| 32 |
+
}
|
| 33 |
+
}
|
UNPACK/vocabulary.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
UNPACK/work!.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fdbf485bf258fc2bb061dc57aea75da8a5053180829a41837c5f2eb4b8a607b
|
| 3 |
+
size 364385535
|