chrisjcc commited on
Commit
e3c8f7e
·
verified ·
1 Parent(s): 99feccd

Upload final trained model

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -34
  2. README.md +150 -38
  3. final_model.zip +3 -0
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.zip filter=lfs diff=lfs merge=lfs -text
2
+ *.onnx filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -1,48 +1,160 @@
1
  ---
2
- language: en
3
- license: mit
4
- library_name: stable-baselines3
5
  tags:
6
- - reinforcement-learning
7
- - stable-baselines3
8
- - gymnasium
9
- - maskable-ppo
10
- datasets:
11
- - custom-utdg-env
12
- metrics:
13
- - episode_reward
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ---
15
 
16
- # UTDG Maskable PPO Policy
17
 
18
- This model is trained on the UTDG (Untitled Tower Defense Game) environment using Stable-Baselines3 MaskablePPO.
19
 
20
- ## Model Details
21
-
22
- - **Algorithm**: MaskablePPO (Proximal Policy Optimization with invalid action masking)
23
- - **Framework**: Stable-Baselines3
24
- - **Environment**: Custom UTDG Gymnasium environment
25
- - **Task**: Tower defense game AI agent
26
 
27
  ## Usage
 
28
  ```python
29
- from huggingface_hub import hf_hub_download
30
  from sb3_contrib import MaskablePPO
31
-
32
- # Download the model
33
- model_path = hf_hub_download(
34
- repo_id="chrisjcc/utdg-maskableppo-policy",
35
- filename="maskableppo_utdg_policy.zip"
36
- )
37
-
38
- # Load the model
39
- model = MaskablePPO.load(model_path)
40
-
41
- # Use for inference
42
- # obs, info = env.reset()
43
- # action, _states = model.predict(obs, action_masks=info["action_mask"])
44
- ```
45
-
46
- ## Training
47
-
48
- The model was trained using reinforcement learning on the UTDG environment.
 
1
  ---
 
 
 
2
  tags:
3
+ - reinforcement-learning
4
+ - stable-baselines3
5
+ - maskable-ppo
6
+ - utdg
7
+ - tower-defense
8
+ metadata:
9
+ utc_timestamp: 2025-11-28T13:41:53.659311
10
+ env_name: UTDGEnv-v0
11
+ model_file: final_model.zip
12
+ total_timesteps: 0
13
+ task: reinforcement-learning
14
+ algorithm: MaskablePPO
15
+ game: Untitled Tower Defense Game
16
+ hydra_config: |
17
+ {
18
+ "runtime": {
19
+ "mode": "web",
20
+ "transport": {
21
+ "type": "websocket",
22
+ "role": "server",
23
+ "url": null,
24
+ "timeout": 60.0,
25
+ "reconnect_attempts": 3
26
+ },
27
+ "server": {
28
+ "enabled": true,
29
+ "host": "0.0.0.0",
30
+ "port": 8000,
31
+ "websocket_routes": {
32
+ "ui": "/ws",
33
+ "godot": "/godot"
34
+ }
35
+ },
36
+ "launcher": {
37
+ "enabled": true,
38
+ "http_port": 8080,
39
+ "headless": false,
40
+ "build_dir": "builds/web"
41
+ },
42
+ "godot_path": "builds/web",
43
+ "max_episode_steps": 5000,
44
+ "resume": false,
45
+ "checkpoint_path": "checkpoints/maskableppo_utdg_100000_steps.zip"
46
+ },
47
+ "server": {
48
+ "enabled": false,
49
+ "websocket_routes": {
50
+ "ui": "/ws",
51
+ "godot": "/godot"
52
+ }
53
+ },
54
+ "env": {
55
+ "observation_space": {
56
+ "include_enemy_health": true,
57
+ "include_tower_stats": true,
58
+ "grid_resolution": 32,
59
+ "normalize": true
60
+ },
61
+ "action_space": {
62
+ "type": "discrete",
63
+ "max_towers": 10
64
+ },
65
+ "episode": {
66
+ "max_episode_steps": 5000,
67
+ "truncate_on_life_lost": false,
68
+ "starting_gold": 150,
69
+ "base_health": 10
70
+ }
71
+ },
72
+ "agent": {
73
+ "type": "maskable_ppo",
74
+ "deterministic": true
75
+ },
76
+ "model": {
77
+ "policy": "MaskableActorCriticPolicy",
78
+ "learning_rate": 0.0003,
79
+ "gamma": 0.99,
80
+ "batch_size": 64,
81
+ "n_steps": 2048
82
+ },
83
+ "training": {
84
+ "total_timesteps": 100000,
85
+ "device": "auto",
86
+ "log_interval": 2048,
87
+ "progress_bar": true,
88
+ "verbose": 1
89
+ },
90
+ "checkpoint": {
91
+ "enabled": true,
92
+ "save_path": "checkpoints",
93
+ "save_freq": 10000,
94
+ "save_best_only": true,
95
+ "keep_last": 3,
96
+ "name_prefix": "model_policy",
97
+ "save_replay_buffer": false,
98
+ "save_vecnormalize": false
99
+ },
100
+ "callbacks": {
101
+ "wandb": {
102
+ "enabled": true,
103
+ "project": "utdg",
104
+ "entity": "rl4aa",
105
+ "run_name": null,
106
+ "tags": [],
107
+ "mode": "online",
108
+ "save_code": true,
109
+ "eval_enabled": false
110
+ },
111
+ "hf_upload": {
112
+ "enabled": true,
113
+ "repo_id": "chrisjcc/utdg-maskableppo-policy",
114
+ "private": true,
115
+ "repo_type": "model",
116
+ "token": null,
117
+ "metadata": {
118
+ "task": "reinforcement-learning",
119
+ "algorithm": "MaskablePPO",
120
+ "game": "Untitled Tower Defense Game"
121
+ },
122
+ "push_strategy": "final",
123
+ "local_model_path": "",
124
+ "upload_freq": 10000,
125
+ "commit_message": "Upload model checkpoint",
126
+ "lfs": {
127
+ "use_lfs": true,
128
+ "files": [
129
+ "*.zip",
130
+ "*.onnx"
131
+ ]
132
+ }
133
+ }
134
+ },
135
+ "experiment": {
136
+ "name": "utdg_experiment",
137
+ "seed": 42,
138
+ "log_dir": "logs"
139
+ },
140
+ "logging": {
141
+ "level": "INFO",
142
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
143
+ }
144
+ }
145
  ---
146
 
147
+ # UTDG MaskablePPO Agent
148
 
149
+ This repository contains a trained agent for the Untitled Tower Defense Game.
150
 
151
+ ## Contents
152
+ - `final_model.zip` — final SB3 checkpoint
153
+ - Hydra configuration snapshot
154
+ - Training metadata
 
 
155
 
156
  ## Usage
157
+
158
  ```python
 
159
  from sb3_contrib import MaskablePPO
160
+ model = MaskablePPO.load("USERNAME/REPO_NAME", revision="production")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
final_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27562bcd845fd3bef8088b55f540a647dddbf78d368608e4119361cff8a8005f
3
+ size 646314