Spaces:

ArseniyPerchik
/

Agent_Control_with_Language

Build error

App Files Files Community

ArseniyPerchik commited on Apr 20, 2025

Commit

a853d77

1 Parent(s): fbd53e3

Browse files

Files changed (9) hide show

.gitignore +2 -1
draft_2.py +24 -10
globals.py +11 -0
good_policies/sac_warehouse_r_10_working_v1.zip +3 -0
good_policies/sac_warehouse_r_20.zip +3 -0
plot_functions.py +13 -4
register_env.py +12 -0
train_agent.py +69 -0
warehouse_env.py +60 -46

.gitignore CHANGED Viewed

@@ -27,4 +27,5 @@ my_folder
 results
 test-trainer
 .gradio
-secrets.txt

 results
 test-trainer
 .gradio
+secrets.txt
+ppo_tensorboard

draft_2.py CHANGED Viewed

@@ -1,13 +1,27 @@
-import numpy as np
-# angle_deg = 350  # for example
-# angle_rad = np.deg2rad(angle_deg)
-#
-# vector = np.array([np.cos(angle_rad), np.sin(angle_rad)])
-# print(vector)
-input_angle = 0.5
-angle_rad = 2 * np.pi * input_angle
-vector_2 = np.array([np.cos(angle_rad), np.sin(angle_rad)])
-print(vector_2)

+import gymnasium as gym
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+import torch
+# Parallel environments
+vec_env = make_vec_env("CartPole-v1", n_envs=4)
+policy_kwargs = dict(activation_fn=torch.nn.ReLU,
+                     net_arch=dict(pi=[32, 32], vf=[32, 32]))
+model = PPO("MlpPolicy", vec_env,
+            verbose=1,
+            policy_kwargs=policy_kwargs,
+            tensorboard_log="./ppo_tensorboard/")
+model.learn(total_timesteps=100000, tb_log_name="CartPole")
+model.save("ppo_cartpole")
+del model # remove to demonstrate saving and loading
+model = PPO.load("ppo_cartpole")
+obs = vec_env.reset()
+while True:
+    action, _states = model.predict(obs)
+    obs, rewards, dones, info = vec_env.step(action)
+    vec_env.render("human")

globals.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import matplotlib.pyplot as plt
+import matplotlib
+from matplotlib.patches import Circle
+import math
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+import torch

good_policies/sac_warehouse_r_10_working_v1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfd9ad5aea06290c82070061ad1c15f77369e2ce0ada6d2893af143301b38f19
+size 105325

good_policies/sac_warehouse_r_20.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39aca94cf39aaf179a4cc9189882b18450c6eb90d61708899a87949e8bd78792
+size 105325

plot_functions.py CHANGED Viewed

@@ -1,5 +1,4 @@
-import matplotlib.pyplot as plt
-import matplotlib
@@ -7,7 +6,17 @@ import matplotlib
 def plot_env(ax, info):
     ax.cla()
     env = info['env']
-    ax.plot([1, 1], [1, 2], '.', color='b', alpha=0.5, linewidth=5, markersize=20)
     # ax.set_xlim([min(n_agents_list) - 20, max(n_agents_list) + 20])
     ax.set_xlim([0, 100])
     ax.set_ylim([0, 100])
@@ -16,7 +25,7 @@ def plot_env(ax, info):
     # ax.set_ylabel('Success Rate', fontsize=27)
     # ax.set_title(f'{img_dir[:-4]} Map | time limit: {time_to_think_limit} sec.')
     # set_plot_title(ax, f'{img_dir[:-4]} Map | time limit: {time_to_think_limit} sec.', size=11)
-    ax.set_title(f'Warehouse', fontweight="bold", size=30)
     # set_legend(ax, size=18)
     # labelsize = 20
     # ax.xaxis.set_tick_params(labelsize=labelsize)

+from globals import *
 def plot_env(ax, info):
     ax.cla()
     env = info['env']
+    agent_x, agent_y = env.agent_x, env.agent_y
+    goal_x, goal_y = env.goal_x, env.goal_y
+    # agent
+    ax.plot([agent_x], [agent_y], marker='o', color='b', alpha=0.5, linewidth=5, markersize=15)
+    # target
+    ax.plot([goal_x], [goal_y], marker='X', color='orange', alpha=0.5, linewidth=5, markersize=15)
+    circle = Circle((goal_x, goal_y), env.RADIUS_COVERAGE, color='orange', fill=True, alpha=0.3)
+    ax.add_patch(circle)
     # ax.set_xlim([min(n_agents_list) - 20, max(n_agents_list) + 20])
     ax.set_xlim([0, 100])
     ax.set_ylim([0, 100])
     # ax.set_ylabel('Success Rate', fontsize=27)
     # ax.set_title(f'{img_dir[:-4]} Map | time limit: {time_to_think_limit} sec.')
     # set_plot_title(ax, f'{img_dir[:-4]} Map | time limit: {time_to_think_limit} sec.', size=11)
+    ax.set_title(f'Warehouse Env | step {env.step_counter}', fontweight="bold", size=10)
     # set_legend(ax, size=18)
     # labelsize = 20
     # ax.xaxis.set_tick_params(labelsize=labelsize)

register_env.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from gymnasium.envs.registration import register
+from warehouse_env import WarehouseEnv
+# Example for the CartPole environment
+register(
+    # unique identifier for the env `name-version`
+    id="WarehouseEnv",
+    # path to the class for creating the env
+    # Note: entry_point also accept a class as input (and not only a string)
+    entry_point=WarehouseEnv,
+    # Max number of steps per episode, using a `TimeLimitWrapper`
+    max_episode_steps=500,
+)

train_agent.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from warehouse_env import *
+from stable_baselines3 import SAC
+def train_func(alg_name='PPO'):
+    env = WarehouseEnv(render_mode='')
+    if alg_name == 'PPO':
+        # PPO
+        policy_kwargs = dict(activation_fn=torch.nn.ReLU,
+                             net_arch=dict(pi=[64, 64], vf=[64, 64]))
+        model = PPO("MlpPolicy", env,
+                    verbose=1,
+                    policy_kwargs=policy_kwargs,
+                    tensorboard_log="./ppo_tensorboard/",
+                    # learning_rate=0.0003,
+                    # clip_range=0.1,
+                    )
+        model.learn(total_timesteps=500000, tb_log_name="WarehouseEnv")
+        model.save("ppo_warehouse")
+    elif alg_name == 'SAC':
+        # policy_kwargs = dict(net_arch=dict(pi=[256, 256], qf=[400, 300]))
+        # policy_kwargs = dict(net_arch=[512, 512])  # Two shared hidden layers
+        policy_kwargs = dict(net_arch=[32, 32])  # Two shared hidden layers
+        model = SAC("MlpPolicy", env, verbose=1,
+                    tensorboard_log="./ppo_tensorboard/",
+                    # learning_rate=0.0003,
+                    policy_kwargs = policy_kwargs,
+                    )
+        model.learn(total_timesteps=700000, log_interval=4, tb_log_name="sac_WarehouseEnv")
+        model.save("sac_warehouse")
+    else:
+        raise RuntimeError('no model')
+def exec_func(alg_name='PPO', model_name=None):
+    env = WarehouseEnv(render_mode='human')
+    if alg_name == 'PPO':
+        model_name = "ppo_warehouse" if model_name is None else model_name
+        model = PPO.load(model_name)
+    elif alg_name == 'SAC':
+        model_name = "sac_warehouse" if model_name is None else model_name
+        model = SAC.load(model_name)
+    else:
+        raise RuntimeError('no model')
+    # vec_env = model.get_env()
+    obs, info = env.reset()
+    while True:
+        action, _states = model.predict(obs)
+        obs, rewards, done, trunc, info = env.step(action)
+        env.render()
+        if done or trunc:
+            obs, info = env.reset()
+def main():
+    # alg_name = 'PPO'
+    alg_name = 'SAC'
+    model_name = 'sac_warehouse_working_v1'
+    # train_func(alg_name)
+    exec_func(alg_name=alg_name, model_name=model_name)
+if __name__ == '__main__':
+    main()

warehouse_env.py CHANGED Viewed

@@ -1,13 +1,7 @@
-import math
-import gymnasium as gym
-import numpy as np
-from gymnasium import spaces
-from stable_baselines3.common.env_checker import check_env
-from stable_baselines3 import PPO
-from stable_baselines3.common.env_util import make_vec_env
 from plot_functions import *
 class WarehouseEnv(gym.Env):
     """
     WarehouseEnv Environment that follows gym interface.
@@ -32,11 +26,12 @@ class WarehouseEnv(gym.Env):
         self.ACTIONS: int = 2
         self.N_CHANNELS: int = 4
         self.SIDE: int = 100
-        self.RADIUS_COVERAGE: int = 5
-        self.MAX_STEPS: int = 200
         self.DIAG: float = math.sqrt(self.SIDE ** 2 + self.SIDE ** 2)
         self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.ACTIONS,), dtype=np.float32)
-        self.observation_space = spaces.Box(low=-1, high=1, shape=(self.N_CHANNELS,), dtype=np.float64)
         self.field = np.zeros((self.SIDE, self.SIDE))
         # Agent
@@ -50,7 +45,7 @@ class WarehouseEnv(gym.Env):
         # to render
         if self.to_render:
-            self.fig, self.ax = plt.subplots(2, 2, figsize=(17, 10))
     @property
     def rel_x(self) -> int:
@@ -63,14 +58,45 @@ class WarehouseEnv(gym.Env):
     def reset(self, seed=None, options=None):
         self.agent_x = np.random.uniform(0, self.SIDE)
         self.agent_y = np.random.uniform(0, self.SIDE)
         self.goal_x = np.random.uniform(0, self.SIDE)
         self.goal_y = np.random.uniform(0, self.SIDE)
         self.step_counter = 0
         self.terminated = False
         self.truncated = False
-        observation = np.array([self.agent_x / self.SIDE, self.agent_y / self.SIDE, self.rel_x / self.SIDE, self.rel_y / self.SIDE])
         info = {}
-        return observation, info
     def step(self, action):
         if self.terminated:
@@ -87,34 +113,37 @@ class WarehouseEnv(gym.Env):
         self.agent_x += input_vel * mov_x
         self.agent_y += input_vel * mov_y
-        rel_x, rel_y = self.rel_x, self.rel_y
-        distance = math.sqrt(rel_x**2 + rel_y**2)
-        # obs
-        observation = np.array([self.agent_x / self.SIDE, self.agent_y / self.SIDE, rel_x / self.SIDE, rel_y / self.SIDE])
         # terminated + reward
-        if not (0 <= self.agent_x < self.SIDE) or not (0 <= self.agent_y < self.SIDE):
-            self.terminated = True
-            reward = -10
-        elif distance < self.RADIUS_COVERAGE:
-            self.terminated = True
-            reward = 10
-        else:
-            reward =  - (distance / self.DIAG)
         # truncated
         if self.step_counter > self.MAX_STEPS:
             self.truncated = True
         self.step_counter += 1
         # info
         info = {}
-        return observation, reward, self.terminated, self.truncated, info
     def render(self):
-        plot_env(self.ax[0, 0], info={'env': self})
         plt.tight_layout()
         plt.pause(0.01)
@@ -123,24 +152,9 @@ class WarehouseEnv(gym.Env):
 def main():
-    env = WarehouseEnv(render_mode='human')
     # It will check your custom environment and output additional warnings if needed
-    # check_env(env)
-    # vec_env = make_vec_env(env, n_envs=4)
-    # model = PPO("MlpPolicy", env, verbose=1)
-    # model.learn(total_timesteps=25000)
-    # model.save("ppo_warehouse")
-    #
-    # del model  # remove to demonstrate saving and loading
-    model = PPO.load("ppo_warehouse")
-    vec_env = model.get_env()
-    obs, info = env.reset()
-    while True:
-        action, _states = model.predict(obs)
-        obs, rewards, done, trunc, info = env.step(action)
-        env.render()
 if __name__ == '__main__':

 from plot_functions import *
 class WarehouseEnv(gym.Env):
     """
     WarehouseEnv Environment that follows gym interface.
         self.ACTIONS: int = 2
         self.N_CHANNELS: int = 4
         self.SIDE: int = 100
+        # self.RADIUS_COVERAGE: int = 20  # working v1
+        self.RADIUS_COVERAGE: int = 10
+        self.MAX_STEPS: int = 500
         self.DIAG: float = math.sqrt(self.SIDE ** 2 + self.SIDE ** 2)
         self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.ACTIONS,), dtype=np.float32)
+        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.N_CHANNELS,), dtype=np.float64)
         self.field = np.zeros((self.SIDE, self.SIDE))
         # Agent
         # to render
         if self.to_render:
+            self.fig, self.ax = plt.subplots(1, 1, figsize=(5, 5))
     @property
     def rel_x(self) -> int:
     def reset(self, seed=None, options=None):
         self.agent_x = np.random.uniform(0, self.SIDE)
         self.agent_y = np.random.uniform(0, self.SIDE)
+        # self.agent_x = 50.0
+        # self.agent_y = 50.0
         self.goal_x = np.random.uniform(0, self.SIDE)
         self.goal_y = np.random.uniform(0, self.SIDE)
         self.step_counter = 0
         self.terminated = False
         self.truncated = False
         info = {}
+        return self.build_obs(), info
+    def build_obs(self):
+        observation = np.array([
+            self.agent_x / self.SIDE * 2 - 1,
+            self.agent_y / self.SIDE * 2 - 1,
+            self.rel_x / self.SIDE * 2 - 1,
+            self.rel_y / self.SIDE * 2 - 1
+            # self.goal_x / self.SIDE * 2 - 1,
+            # self.goal_y / self.SIDE * 2 - 1
+        ])
+        return observation
+    def build_reward(self):
+        rel_x, rel_y = self.rel_x, self.rel_y
+        # rel_x = self.agent_x - self.goal_x
+        # rel_y = self.agent_y - self.goal_y
+        distance = math.sqrt(rel_x ** 2 + rel_y ** 2)
+        # terminated + reward
+        # if not (0 < self.agent_x < self.SIDE) or not (0 <= self.agent_y < self.SIDE):
+        if distance < self.RADIUS_COVERAGE:
+            self.terminated = True
+            self.truncated = True
+            return 20
+        elif self.agent_x < 0 or self.agent_x > self.SIDE or self.agent_y < 0 or self.agent_y > self.SIDE:
+            self.terminated = True
+            self.truncated = True
+            return -10
+        # return -1 * (distance / self.DIAG)
+        return -0.001
     def step(self, action):
         if self.terminated:
         self.agent_x += input_vel * mov_x
         self.agent_y += input_vel * mov_y
+        # rel_x, rel_y = self.rel_x, self.rel_y
+        # rel_x = self.agent_x - self.goal_x
+        # rel_y = self.agent_y - self.goal_y
+        # distance = math.sqrt(rel_x**2 + rel_y**2)
         # terminated + reward
+        # if not (0 < self.agent_x < self.SIDE) or not (0 <= self.agent_y < self.SIDE):
+        # if distance < self.RADIUS_COVERAGE:
+        #     self.terminated = True
+        #     self.truncated = True
+        #     reward = 2
+        #     print('Win')
+        # elif self.agent_x < 0 or self.agent_x > self.SIDE or self.agent_y < 0 or self.agent_y > self.SIDE:
+        #     self.terminated = True
+        #     self.truncated = True
+        #     reward = -2
+        # else:
+        #     reward =  -1 * (distance / self.DIAG)
         # truncated
         if self.step_counter > self.MAX_STEPS:
+            # self.terminated = True
             self.truncated = True
         self.step_counter += 1
         # info
         info = {}
+        return self.build_obs(), self.build_reward(), self.terminated, self.truncated, info
     def render(self):
+        plot_env(self.ax, info={'env': self})
         plt.tight_layout()
         plt.pause(0.01)
 def main():
+    env = WarehouseEnv(render_mode='')
     # It will check your custom environment and output additional warnings if needed
+    check_env(env)
 if __name__ == '__main__':

more