Fetch-Reinforcement_learning_Project

Sleeping

App Files Files Community

gkemp181 commited on Apr 30, 2025

Commit

07bd4cd

1 Parent(s): 479bef5

Added model selection to app

Browse files

Files changed (11) hide show

App/model/{model.zip → pick_and_place_dense.zip} +2 -2
App/model/pick_and_place_her.zip +3 -0
App/model/push.zip +3 -0
App/model/reach.zip +3 -0
__pycache__/app.cpython-311.pyc +0 -0
__pycache__/custom_env.cpython-311.pyc +0 -0
app.py +60 -31
custom_env.py +53 -41
app_test_2.py → old_apps/app_test_2.py +0 -0
app_test_3.py → old_apps/app_test_3.py +0 -0
app_test_4.py → old_apps/app_test_4.py +0 -0

App/model/{model.zip → pick_and_place_dense.zip} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d257f9937f3914c65c7ad21a2c25d601862ffc7d0ede4a7c6d3270fc04db2eec
-size 3372650

 version https://git-lfs.github.com/spec/v1
+oid sha256:8582301dcbe21ded7d266bd5548a629d76f603ceeb44995af657a3b5b322295a
+size 3377664

App/model/pick_and_place_her.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab787b78fb54a6ee447bfd046248a1217a6e3207633e6753a2824282af3c08ad
+size 3379264

App/model/push.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9953fc1dfd1c19b9faa56d898cbc985790468b41c46c530f797e5b7f56106715
+size 3377665

App/model/reach.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51a4a2ae881f240be42ff6cae71e54c2a0487d5b083cacd52e346359d6fbb139
+size 3207511

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (4.31 kB). View file

__pycache__/custom_env.cpython-311.pyc ADDED Viewed

Binary file (4.68 kB). View file

app.py CHANGED Viewed

@@ -1,7 +1,4 @@
-# <-- this must come first, before any mujoco / gym imports
 import os
-os.environ["MUJOCO_GL"] = "osmesa"
 import gradio as gr
 import numpy as np
 import torch
@@ -9,49 +6,81 @@ import imageio
 from stable_baselines3 import SAC
 from custom_env import create_env
-# Define the function that runs the model and outputs a video
-def run_model_episode(x_start, y_start, x_targ, y_targ, z_targ):
-    # Create environment with user inputs
-    env = create_env(render_mode="rgb_array",
-                     block_xy=(x_start, y_start),
-                     goal_xyz=(x_targ, y_targ, z_targ))
-    # Load your trained model
-    checkpoint_path = os.path.join("App", "model", "model.zip")
-    model = SAC.load(checkpoint_path, env=env, verbose=1)
-    # Rollout the episode
     frames = []
     obs, info = env.reset()
-    for _ in range(200):  # Shorter rollout
         action, _ = model.predict(obs, deterministic=True)
         obs, reward, done, trunc, info = env.step(action)
-        frame = env.render()
-        frames.append(frame)
         if done or trunc:
             obs, info = env.reset()
     env.close()
-    # Save frames into a video
     video_path = "run_video.mp4"
     imageio.mimsave(video_path, frames, fps=30)
     return video_path
-# --------------------------------------
-# Build the Gradio App
-# --------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## Fetch Robot: Model Demo App")
-    gr.Markdown("Enter start and target coordinates, then click 'Run Model' to watch the robot!")
     gr.Markdown("Coordinates are relative to the center of the table.")
-    gr.Markdown("X and Y coordinates are in meters, Z coordinate is height in meters.")
-    gr.Markdown("0,0,0 is the center of the table.")
     with gr.Row():
         x_start = gr.Number(label="Start X", value=0.0)
@@ -62,14 +91,14 @@ with gr.Blocks() as demo:
         y_targ = gr.Number(label="Target Y", value=0.1)
         z_targ = gr.Number(label="Target Z", value=0.1)
-    run_button = gr.Button("Run Model")
     output_video = gr.Video()
     run_button.click(
         fn=run_model_episode,
-        inputs=[x_start, y_start, x_targ, y_targ, z_targ],
         outputs=output_video
     )
 demo.launch(share=True)

 import os
 import gradio as gr
 import numpy as np
 import torch
 from stable_baselines3 import SAC
 from custom_env import create_env
+# Update your run function to accept a model_name
+def run_model_episode(x_start, y_start, x_targ, y_targ, z_targ, model_name, random_coords):
+    # map the radio‐choice to the actual checkpoint on disk
+    model_paths = {
+        "Pick & Place (HER)": "App/model/pick_and_place_her.zip",
+        "Pick & Place (Dense)":        "App/model/pick_and_place_dense.zip",
+        "Push":         "App/model/push.zip",
+        "Reach":         "App/model/reach.zip",
+    }
+    checkpoint_path = model_paths[model_name]
+    # map the radio‐choice to the actual environment name
+    environments = {
+        "Pick & Place (HER)": "FetchPickAndPlace-v3",
+        "Pick & Place (Dense)":        "FetchPickAndPlaceDense-v3",
+        "Push":         "FetchPush-v3",
+        "Reach":         "FetchReach-v3",
+    }
+    environment = environments[model_name]
+    # Handle environment coordinates
+    if(environment == "FetchPush-v3"):
+        z_targ = 0.0
+    block_xy=(x_start, y_start),
+    goal_xyz=(x_targ, y_targ, z_targ)
+    if random_coords:
+        block_xy = None
+        goal_xyz = None
+    # create the env
+    env = create_env(
+        render_mode="rgb_array",
+        block_xy=block_xy,
+        goal_xyz=goal_xyz,
+        environment=environment
+    )
+    # load the selected model
+    model = SAC.load(checkpoint_path, env=env, verbose=0)
     frames = []
     obs, info = env.reset()
+    for _ in range(200):
         action, _ = model.predict(obs, deterministic=True)
         obs, reward, done, trunc, info = env.step(action)
+        frames.append(env.render())
         if done or trunc:
             obs, info = env.reset()
     env.close()
     video_path = "run_video.mp4"
     imageio.mimsave(video_path, frames, fps=30)
     return video_path
 with gr.Blocks() as demo:
     gr.Markdown("## Fetch Robot: Model Demo App")
+    gr.Markdown("Enter coordinates, pick a model, then click **Run Model**.")
     gr.Markdown("Coordinates are relative to the center of the table.")
+    # 1) add a radio (or gr.Dropdown) for model selection
+    model_selector = gr.Radio(
+        choices=["Pick & Place (HER)", "Pick & Place (Dense)", "Push", "Reach"],
+        value="Pick & Place (HER)",
+        label="Select a model/environment"
+    )
+    # Randomize coordinates
+    randomize = gr.Checkbox(
+        label="Use randomized coordinates?",
+        value=False
+    )
     with gr.Row():
         x_start = gr.Number(label="Start X", value=0.0)
         y_targ = gr.Number(label="Target Y", value=0.1)
         z_targ = gr.Number(label="Target Z", value=0.1)
+    run_button   = gr.Button("Run Model")
     output_video = gr.Video()
+    # 2) include the selector as an input to your click callback
     run_button.click(
         fn=run_model_episode,
+        inputs=[x_start, y_start, x_targ, y_targ, z_targ, model_selector, randomize],
         outputs=output_video
     )
 demo.launch(share=True)

custom_env.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # <-- this must come first, before any mujoco / gym imports
-import os
-os.environ["MUJOCO_GL"] = "osmesa"
 import numpy as np
 import gymnasium as gym
@@ -8,7 +8,7 @@ import gymnasium_robotics
 import mujoco
 class CustomFetchWrapper(gym.Wrapper):
-    def __init__(self, env, block_xy=None, goal_xyz=None):
         super().__init__(env)
         self.u = env.unwrapped  # MujocoFetchPickAndPlaceEnv
         # stash your fixed coords (or None to randomize)
@@ -16,6 +16,7 @@ class CustomFetchWrapper(gym.Wrapper):
                                  if block_xy is not None else None)
         self.default_goal_xyz = (np.array(goal_xyz, dtype=float)
                                  if goal_xyz is not None else None)
     def reset(self, *args, **kwargs):
         # 1) do the normal reset — gets you a random goal in obs
@@ -33,45 +34,49 @@ class CustomFetchWrapper(gym.Wrapper):
         ):
             utils.set_joint_qpos(model, data, name, val)
-        # 3) pick block position
-        if self.default_block_xy is None:
-            # — original random‐sampling —
-            home_xy   = u.initial_gripper_xpos[:2]
-            obj_range = u.obj_range
-            min_dist  = u.distance_threshold
-            while True:
-                offset = rng.uniform(-obj_range, obj_range, size=2)
-                if np.linalg.norm(offset) < min_dist:
-                    continue
-                cand = home_xy + offset
-                if np.linalg.norm(cand - obs["desired_goal"][:2]) < min_dist:
-                    continue
-                break
-            block_xy = cand
-        else:
-            block_xy = self.default_block_xy
-        # place the block
-        blk_qpos = utils.get_joint_qpos(model, data, "object0:joint")
-        blk_qpos[0:2] = block_xy
-        blk_qpos[2]    = 0.42  # table height
-        utils.set_joint_qpos(model, data, "object0:joint", blk_qpos)
         # 4) pick goal position
-        if self.default_goal_xyz is None:
-            # — original “raise above table” logic —
-            raise_z = 0.1 + rng.uniform(0, 0.2)
-            new_goal = obs["desired_goal"].copy()
-            new_goal[2] = blk_qpos[2] + raise_z
-        else:
             new_goal = self.default_goal_xyz
-        # override the goal both in the env and in the MuJoCo site
-        u.goal = new_goal
-        sid = mujoco.mj_name2id(model,
-                                mujoco.mjtObj.mjOBJ_SITE,
-                                "target0")
-        data.site_xpos[sid] = new_goal
         # 5) forward‐kinematics + fresh obs
         u._mujoco.mj_forward(model, data)
@@ -80,9 +85,15 @@ class CustomFetchWrapper(gym.Wrapper):
         return obs, info
-def create_env(render_mode=None, block_xy=None, goal_xyz=None):
     gym.register_envs(gymnasium_robotics)
-    base_env = gym.make("FetchPickAndPlace-v3", render_mode=render_mode)
     u = base_env.unwrapped
     # 1) compute table center in world coords
@@ -110,6 +121,7 @@ def create_env(render_mode=None, block_xy=None, goal_xyz=None):
     env = CustomFetchWrapper(
         base_env,
         block_xy=abs_block_xy,
-        goal_xyz=abs_goal_xyz
     )
     return env

 # <-- this must come first, before any mujoco / gym imports
+# import os
+# os.environ["MUJOCO_GL"] = "osmesa"
 import numpy as np
 import gymnasium as gym
 import mujoco
 class CustomFetchWrapper(gym.Wrapper):
+    def __init__(self, env, block_xy=None, goal_xyz=None, object=True):
         super().__init__(env)
         self.u = env.unwrapped  # MujocoFetchPickAndPlaceEnv
         # stash your fixed coords (or None to randomize)
                                  if block_xy is not None else None)
         self.default_goal_xyz = (np.array(goal_xyz, dtype=float)
                                  if goal_xyz is not None else None)
+        self.object = object
     def reset(self, *args, **kwargs):
         # 1) do the normal reset — gets you a random goal in obs
         ):
             utils.set_joint_qpos(model, data, name, val)
+        # pull out the actual goal so we can avoid it
+        goal_pos = obs["desired_goal"][:2].copy()
+        if (self.object==True):
+            # 3) pick block position
+            if self.default_block_xy is None:
+                home_xy  = u.initial_gripper_xpos[:2]
+                obj_range = u.obj_range
+                min_dist  = u.distance_threshold
+                while True:
+                    offset = rng.uniform(-obj_range, obj_range, size=2)
+                    # 3a) must be outside the “too-close to gripper” zone
+                    if np.linalg.norm(offset) < min_dist:
+                        continue
+                    candidate_xy = home_xy + offset
+                    # 3b) must be outside the “too-close to goal” zone
+                    if np.linalg.norm(candidate_xy - goal_pos) < min_dist:
+                        continue
+                    # if we get here, both checks passed
+                    break
+                block_xy = candidate_xy
+            else:
+                block_xy = self.default_block_xy
+            # place the block
+            blk_qpos = utils.get_joint_qpos(model, data, "object0:joint")
+            blk_qpos[0:2] = block_xy
+            blk_qpos[2]    = 0.42  # table height
+            utils.set_joint_qpos(model, data, "object0:joint", blk_qpos)
         # 4) pick goal position
+        if self.default_goal_xyz is not None:
             new_goal = self.default_goal_xyz
+            # override the goal both in the env and in the MuJoCo site
+            u.goal = new_goal
+            sid = mujoco.mj_name2id(model,
+                                    mujoco.mjtObj.mjOBJ_SITE,
+                                    "target0")
+            data.site_xpos[sid] = new_goal
         # 5) forward‐kinematics + fresh obs
         u._mujoco.mj_forward(model, data)
         return obs, info
+def create_env(render_mode=None, block_xy=None, goal_xyz=None, environment = "FetchPickAndPlace-v3"):
     gym.register_envs(gymnasium_robotics)
+    if(environment == "FetchReach-v3"):
+        object = False
+    else:
+        object = True
+    base_env = gym.make(environment, render_mode=render_mode)
     u = base_env.unwrapped
     # 1) compute table center in world coords
     env = CustomFetchWrapper(
         base_env,
         block_xy=abs_block_xy,
+        goal_xyz=abs_goal_xyz,
+        object=object
     )
     return env

app_test_2.py → old_apps/app_test_2.py RENAMED Viewed

File without changes

app_test_3.py → old_apps/app_test_3.py RENAMED Viewed

File without changes

app_test_4.py → old_apps/app_test_4.py RENAMED Viewed

File without changes