Update RL Model: Add/Update model 'PPO-CartPole-v1-ep110' and regenerate unified README.md

Browse files

Files changed (3) hide show

PPO-CartPole-v1-ep110/group1-shard1of1.bin +3 -0
PPO-CartPole-v1-ep110/model.json +1 -0
README.md +6 -6

PPO-CartPole-v1-ep110/group1-shard1of1.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a3e428b23edb19b41eb024f7ae8ae37723c3ff5e4fafd9068723ee2cf24d45e
+size 69640

PPO-CartPole-v1-ep110/model.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"modelTopology":{"class_name":"Sequential","config":{"name":"sequential_1","layers":[{"class_name":"Dense","config":{"units":128,"activation":"tanh","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_Dense1","trainable":true,"batch_input_shape":[null,4],"dtype":"float32"}},{"class_name":"Dense","config":{"units":128,"activation":"tanh","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_Dense2","trainable":true}},{"class_name":"Dense","config":{"units":2,"activation":"softmax","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_Dense3","trainable":true}}]},"keras_version":"tfjs-layers 4.22.0","backend":"tensor_flow.js"},"weightsManifest":[{"paths":["group1-shard1of1.bin"],"weights":[{"name":"dense_Dense1/kernel","shape":[4,128],"dtype":"float32"},{"name":"dense_Dense1/bias","shape":[128],"dtype":"float32"},{"name":"dense_Dense2/kernel","shape":[128,128],"dtype":"float32"},{"name":"dense_Dense2/bias","shape":[128],"dtype":"float32"},{"name":"dense_Dense3/kernel","shape":[128,2],"dtype":"float32"},{"name":"dense_Dense3/bias","shape":[2],"dtype":"float32"}]}],"agenlusMetadata":{"algorithm":"PPO","episodes":110,"hyperparams":{"gamma":0.99,"lambdaGae":0.95,"clipEpsilon":0.2,"entropyCoef":0.01,"learningRate":0.0003,"epochs":4,"rolloutLen":512,"currentEpsilon":null,"currentAlpha":null},"seed":448565,"seedPinned":true,"recentScore":82}}

README.md CHANGED Viewed

@@ -14,22 +14,22 @@ Welcome to your Agenlus Reinforcement Learning repository! This repository hosts
 | Model Name | Environment | Algorithm | Best Score | Episodes | Links |
 | :--- | :--- | :--- | :--- | :--- | :--- |
-| **PPO-CartPole-v1-ep103** | `CartPole-v1` | `PPO` | **47.50** | 103 | [Browse Files](https://huggingface.co/umjunsik1323/RL_Models/tree/main/PPO-CartPole-v1-ep103) |
 ## 📝 Model Details & Instructions
-### 📦 PPO-CartPole-v1-ep103
 *   **Environment:** `CartPole-v1`
 *   **RL Algorithm:** `PPO`
-*   **Best Avg Reward:** `47.50`
-*   **Episodes Trained:** `103`
 **Description:**
-PPO model trained on CartPole-v1 for 103 episodes. Best avg reward: 47.50.
 **How to load:**
 ```javascript
-const model = await tf.loadLayersModel('https://huggingface.co/umjunsik1323/RL_Models/raw/main/PPO-CartPole-v1-ep103/model.json');
 ```
 ---

 | Model Name | Environment | Algorithm | Best Score | Episodes | Links |
 | :--- | :--- | :--- | :--- | :--- | :--- |
+| **PPO-CartPole-v1-ep110** | `CartPole-v1` | `PPO` | **58.59** | 110 | [Browse Files](https://huggingface.co/umjunsik1323/RL_Models/tree/main/PPO-CartPole-v1-ep110) |
 ## 📝 Model Details & Instructions
+### 📦 PPO-CartPole-v1-ep110
 *   **Environment:** `CartPole-v1`
 *   **RL Algorithm:** `PPO`
+*   **Best Avg Reward:** `58.59`
+*   **Episodes Trained:** `110`
 **Description:**
+PPO model trained on CartPole-v1 for 110 episodes. Best avg reward: 58.59.
 **How to load:**
 ```javascript
+const model = await tf.loadLayersModel('https://huggingface.co/umjunsik1323/RL_Models/raw/main/PPO-CartPole-v1-ep110/model.json');
 ```
 ---