|
|
import os |
|
|
import time |
|
|
import math |
|
|
import random |
|
|
import numpy as np |
|
|
import gymnasium as gym |
|
|
from gymnasium import spaces |
|
|
|
|
|
import rclpy |
|
|
from rclpy.node import Node |
|
|
from rclpy.qos import QoSProfile, ReliabilityPolicy |
|
|
|
|
|
from geometry_msgs.msg import Twist |
|
|
from sensor_msgs.msg import LaserScan |
|
|
from nav_msgs.msg import Odometry |
|
|
|
|
|
import subprocess |
|
|
|
|
|
|
|
|
from sb3_contrib import RecurrentPPO |
|
|
from stable_baselines3.common.callbacks import CheckpointCallback |
|
|
from stable_baselines3.common.vec_env import DummyVecEnv |
|
|
from stable_baselines3.common.monitor import Monitor |
|
|
|
|
|
|
|
|
|
|
|
ROBOT_NAME = "bot1" |
|
|
WORLD_NAME = "arena" |
|
|
|
|
|
CMD_VEL_TOPIC = f"/{ROBOT_NAME}/cmd_vel" |
|
|
SCAN_TOPIC = f"/{ROBOT_NAME}/scan" |
|
|
ODOM_TOPIC = f"/{ROBOT_NAME}/odom" |
|
|
|
|
|
ARENA_SIZE = 4.5 |
|
|
|
|
|
|
|
|
class RcCarEnv(gym.Env): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
|
|
|
if not rclpy.ok(): |
|
|
rclpy.init() |
|
|
|
|
|
self.node = rclpy.create_node("rl_driver_node") |
|
|
|
|
|
qos = QoSProfile( |
|
|
reliability=ReliabilityPolicy.BEST_EFFORT, |
|
|
depth=10 |
|
|
) |
|
|
|
|
|
self.pub_cmd = self.node.create_publisher(Twist, CMD_VEL_TOPIC, 10) |
|
|
self.sub_scan = self.node.create_subscription( |
|
|
LaserScan, SCAN_TOPIC, self.scan_cb, qos |
|
|
) |
|
|
self.sub_odom = self.node.create_subscription( |
|
|
Odometry, ODOM_TOPIC, self.odom_cb, qos |
|
|
) |
|
|
|
|
|
print(f"📡 Bağlantı Başarılı: {ROBOT_NAME} (Sabit Ceza Modu)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.max_speed = 1.0 |
|
|
self.max_steering = 0.6 |
|
|
self.current_steering = 0.0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.obstacle_names = [f"box_{i}" for i in range(1, 11)] |
|
|
self.box_positions = [] |
|
|
|
|
|
self.target_x = 0.0 |
|
|
self.target_y = 0.0 |
|
|
|
|
|
self.robot_x = 0.0 |
|
|
self.robot_y = 0.0 |
|
|
self.robot_yaw = 0.0 |
|
|
|
|
|
self.prev_distance_to_target = None |
|
|
self.goal_reached = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.scan_data = None |
|
|
self.raw_scan_data = None |
|
|
self.scan_received = False |
|
|
|
|
|
self.n_obs = 60 |
|
|
self.max_range = 10.0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.step_count = 0 |
|
|
self.max_steps = 2500 |
|
|
self.episode_count = 0 |
|
|
self.stuck_counter = 0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.action_space = spaces.Box( |
|
|
low=np.array([-1.0, -1.0]), |
|
|
high=np.array([1.0, 1.0]), |
|
|
dtype=np.float32 |
|
|
) |
|
|
|
|
|
self.observation_space = spaces.Box( |
|
|
low=0.0, |
|
|
high=1.0, |
|
|
shape=(self.n_obs + 4,), |
|
|
dtype=np.float32 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def odom_cb(self, msg): |
|
|
self.robot_x = msg.pose.pose.position.x |
|
|
self.robot_y = msg.pose.pose.position.y |
|
|
|
|
|
q = msg.pose.pose.orientation |
|
|
siny_cosp = 2 * (q.w * q.z + q.x * q.y) |
|
|
cosy_cosp = 1 - 2 * (q.y * q.y + q.z * q.z) |
|
|
self.robot_yaw = math.atan2(siny_cosp, cosy_cosp) |
|
|
|
|
|
def scan_cb(self, msg): |
|
|
raw = np.array(msg.ranges) |
|
|
raw = np.where(np.isinf(raw), self.max_range, raw) |
|
|
raw = np.where(np.isnan(raw), self.max_range, raw) |
|
|
|
|
|
self.raw_scan_data = np.clip(raw, 0.0, self.max_range) |
|
|
|
|
|
chunk = len(self.raw_scan_data) // self.n_obs |
|
|
if chunk > 0: |
|
|
self.scan_data = np.array( |
|
|
[ |
|
|
np.min(self.raw_scan_data[i * chunk:(i + 1) * chunk]) |
|
|
for i in range(self.n_obs) |
|
|
], |
|
|
dtype=np.float32 |
|
|
) |
|
|
else: |
|
|
self.scan_data = np.full(self.n_obs, self.max_range, dtype=np.float32) |
|
|
|
|
|
self.scan_received = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_observation(self, twist): |
|
|
if self.scan_data is None: |
|
|
lidar_norm = np.ones(self.n_obs, dtype=np.float32) |
|
|
else: |
|
|
lidar_norm = self.scan_data / self.max_range |
|
|
|
|
|
speed_norm = twist.linear.x / self.max_speed |
|
|
steering_norm = (self.current_steering / self.max_steering + 1.0) / 2.0 |
|
|
|
|
|
dist = math.hypot( |
|
|
self.target_x - self.robot_x, |
|
|
self.target_y - self.robot_y |
|
|
) |
|
|
|
|
|
angle = math.atan2( |
|
|
self.target_y - self.robot_y, |
|
|
self.target_x - self.robot_x |
|
|
) - self.robot_yaw |
|
|
|
|
|
while angle > math.pi: |
|
|
angle -= 2 * math.pi |
|
|
while angle < -math.pi: |
|
|
angle += 2 * math.pi |
|
|
|
|
|
dist_norm = np.clip(dist / (ARENA_SIZE * 2), 0.0, 1.0) |
|
|
angle_norm = (angle + math.pi) / (2 * math.pi) |
|
|
|
|
|
obs = np.concatenate( |
|
|
[lidar_norm, [speed_norm, steering_norm, dist_norm, angle_norm]] |
|
|
) |
|
|
|
|
|
return obs.astype(np.float32) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def step(self, action): |
|
|
twist = Twist() |
|
|
|
|
|
throttle = (float(action[0]) + 1.0) / 2.0 |
|
|
twist.linear.x = throttle * self.max_speed |
|
|
|
|
|
target_steer = float(action[1]) * self.max_steering |
|
|
self.current_steering = ( |
|
|
0.6 * self.current_steering + |
|
|
0.4 * target_steer |
|
|
) |
|
|
twist.angular.z = self.current_steering |
|
|
|
|
|
self.pub_cmd.publish(twist) |
|
|
|
|
|
|
|
|
self.scan_received = False |
|
|
start_time = time.time() |
|
|
|
|
|
while not self.scan_received: |
|
|
rclpy.spin_once(self.node, timeout_sec=0.001) |
|
|
if time.time() - start_time > 0.1: |
|
|
break |
|
|
|
|
|
obs = self._get_observation(twist) |
|
|
|
|
|
|
|
|
dist = math.hypot( |
|
|
self.target_x - self.robot_x, |
|
|
self.target_y - self.robot_y |
|
|
) |
|
|
|
|
|
angle = math.atan2( |
|
|
self.target_y - self.robot_y, |
|
|
self.target_x - self.robot_x |
|
|
) - self.robot_yaw |
|
|
|
|
|
while angle > math.pi: |
|
|
angle -= 2 * math.pi |
|
|
while angle < -math.pi: |
|
|
angle += 2 * math.pi |
|
|
|
|
|
if self.scan_data is not None: |
|
|
min_lidar = np.min(self.scan_data) |
|
|
|
|
|
front_slice = self.n_obs // 4 |
|
|
front_start = self.n_obs // 2 - front_slice // 2 |
|
|
front_end = self.n_obs // 2 + front_slice // 2 |
|
|
front_min = np.min(self.scan_data[front_start:front_end]) |
|
|
else: |
|
|
min_lidar = 10.0 |
|
|
front_min = 10.0 |
|
|
|
|
|
|
|
|
self.step_count += 1 |
|
|
reward = 0.0 |
|
|
terminated = False |
|
|
truncated = False |
|
|
|
|
|
if self.prev_distance_to_target is None: |
|
|
self.prev_distance_to_target = dist |
|
|
|
|
|
progress = self.prev_distance_to_target - dist |
|
|
heading_bonus = max(0.0, math.cos(angle)) |
|
|
|
|
|
reward += progress * 25.0 * (1.0 + heading_bonus) |
|
|
reward -= abs(self.current_steering) * 0.1 |
|
|
reward -= 0.2 |
|
|
|
|
|
self.prev_distance_to_target = dist |
|
|
|
|
|
|
|
|
if dist < 0.6: |
|
|
time_bonus = max( |
|
|
0.0, |
|
|
(self.max_steps - self.step_count) / 10.0 |
|
|
) |
|
|
reward += 200.0 + time_bonus |
|
|
terminated = True |
|
|
print(f"🏆 HEDEF! Bonus: {time_bonus:.1f}") |
|
|
|
|
|
|
|
|
if min_lidar < 0.25: |
|
|
reward -= 100.0 |
|
|
terminated = True |
|
|
print("💥 ÇARPIŞMA") |
|
|
|
|
|
|
|
|
if abs(progress) < 0.005 and twist.linear.x > 0.2: |
|
|
self.stuck_counter += 1 |
|
|
else: |
|
|
self.stuck_counter = 0 |
|
|
|
|
|
if self.stuck_counter > 80: |
|
|
reward -= 50.0 |
|
|
terminated = True |
|
|
print("💤 SIKIŞTI") |
|
|
|
|
|
if self.step_count >= self.max_steps: |
|
|
truncated = True |
|
|
reward -= 30.0 |
|
|
print("⏱️ ZAMAN AŞIMI") |
|
|
|
|
|
return obs, reward, terminated, truncated, {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reset(self, seed=None, options=None): |
|
|
super().reset(seed=seed) |
|
|
|
|
|
self.episode_count += 1 |
|
|
self.pub_cmd.publish(Twist()) |
|
|
|
|
|
self.prev_distance_to_target = None |
|
|
self.step_count = 0 |
|
|
self.stuck_counter = 0 |
|
|
|
|
|
self.scan_data = None |
|
|
self.scan_received = False |
|
|
|
|
|
for _ in range(5): |
|
|
rclpy.spin_once(self.node, timeout_sec=0.1) |
|
|
|
|
|
obs = self._get_observation(Twist()) |
|
|
return obs, {} |
|
|
|
|
|
def close(self): |
|
|
self.node.destroy_node() |
|
|
rclpy.shutdown() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
log_dir = "./logs_bot1/" |
|
|
checkpoint_dir = "./checkpoints/" |
|
|
|
|
|
os.makedirs(log_dir, exist_ok=True) |
|
|
os.makedirs(checkpoint_dir, exist_ok=True) |
|
|
|
|
|
env = RcCarEnv() |
|
|
env = Monitor(env, filename=os.path.join(log_dir, "monitor_log")) |
|
|
env = DummyVecEnv([lambda: env]) |
|
|
|
|
|
checkpoint_callback = CheckpointCallback( |
|
|
save_freq=50_000, |
|
|
save_path=checkpoint_dir, |
|
|
name_prefix="bot1_fixed" |
|
|
) |
|
|
|
|
|
print("🚀 EĞİTİM BAŞLIYOR...") |
|
|
|
|
|
model = RecurrentPPO( |
|
|
"MlpLstmPolicy", |
|
|
env, |
|
|
verbose=1, |
|
|
tensorboard_log=log_dir, |
|
|
learning_rate=1e-4, |
|
|
n_steps=2048, |
|
|
batch_size=64, |
|
|
gamma=0.99, |
|
|
ent_coef=0.01, |
|
|
policy_kwargs=dict( |
|
|
net_arch=[dict(pi=[256, 256], vf=[256, 256])], |
|
|
lstm_hidden_size=256, |
|
|
n_lstm_layers=1 |
|
|
), |
|
|
device="auto" |
|
|
) |
|
|
|
|
|
try: |
|
|
model.learn( |
|
|
total_timesteps=2_000_000, |
|
|
callback=checkpoint_callback, |
|
|
progress_bar=True |
|
|
) |
|
|
model.save("bot1_fixed_final") |
|
|
print("✅ EĞİTİM TAMAMLANDI") |
|
|
|
|
|
except KeyboardInterrupt: |
|
|
print("🛑 DURDURULDU") |
|
|
model.save("bot1_fixed_interrupted") |
|
|
|
|
|
finally: |
|
|
env.close() |
|
|
|