|
|
import os |
|
|
import time |
|
|
import math |
|
|
import numpy as np |
|
|
import gymnasium as gym |
|
|
from gymnasium import spaces |
|
|
import rclpy |
|
|
from rclpy.node import Node |
|
|
from rclpy.qos import QoSProfile, ReliabilityPolicy |
|
|
from geometry_msgs.msg import Twist |
|
|
from sensor_msgs.msg import LaserScan |
|
|
from stable_baselines3 import PPO |
|
|
from stable_baselines3.common.callbacks import CheckpointCallback |
|
|
from stable_baselines3.common.vec_env import SubprocVecEnv |
|
|
import torch |
|
|
import subprocess |
|
|
|
|
|
|
|
|
class RcCarEnv(gym.Env): |
|
|
def __init__(self, rank=0): |
|
|
super(RcCarEnv, self).__init__() |
|
|
|
|
|
self.rank = rank |
|
|
self.robot_name = f"bot{rank + 1}" |
|
|
|
|
|
if not rclpy.ok(): |
|
|
rclpy.init() |
|
|
|
|
|
self.node = rclpy.create_node(f'rl_agent_{self.robot_name}') |
|
|
|
|
|
qos_best_effort = QoSProfile(reliability=ReliabilityPolicy.BEST_EFFORT, depth=10) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cmd_topic = f'/model/{self.robot_name}/cmd_vel' |
|
|
scan_topic = f'/model/{self.robot_name}/scan' |
|
|
|
|
|
self.pub_cmd = self.node.create_publisher(Twist, cmd_topic, 10) |
|
|
self.sub_scan = self.node.create_subscription(LaserScan, scan_topic, self.scan_cb, qos_best_effort) |
|
|
|
|
|
|
|
|
self.max_speed = 1.0 |
|
|
self.max_steering = 0.5 |
|
|
self.steering_smooth_factor = 0.25 |
|
|
self.current_steering = 0.0 |
|
|
|
|
|
self.scan_data = None |
|
|
self.raw_scan_data = None |
|
|
self.scan_received = False |
|
|
self.n_obs = 16 |
|
|
self.max_range = 10.0 |
|
|
self.step_count = 0 |
|
|
self.max_steps = 2500 |
|
|
self.stuck_counter = 0 |
|
|
self.episode_count = 0 |
|
|
|
|
|
self.action_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float32) |
|
|
self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(self.n_obs + 4,), dtype=np.float32) |
|
|
|
|
|
print(f"✅ {self.robot_name} BAĞLANDI! Cmd: {cmd_topic} | Scan: {scan_topic}") |
|
|
|
|
|
def scan_cb(self, msg): |
|
|
raw = np.array(msg.ranges) |
|
|
raw = np.where(np.isinf(raw), self.max_range, raw) |
|
|
raw = np.where(np.isnan(raw), self.max_range, raw) |
|
|
self.raw_scan_data = np.clip(raw, 0.0, self.max_range) |
|
|
chunk = len(self.raw_scan_data) // self.n_obs |
|
|
if chunk > 0: |
|
|
self.scan_data = np.array([np.min(self.raw_scan_data[i*chunk:(i+1)*chunk]) for i in range(self.n_obs)], dtype=np.float32) |
|
|
else: |
|
|
self.scan_data = np.full(self.n_obs, self.max_range, dtype=np.float32) |
|
|
self.scan_received = True |
|
|
|
|
|
def _get_observation(self, twist): |
|
|
if self.scan_data is not None: |
|
|
lidar_norm = self.scan_data / self.max_range |
|
|
min_dist = np.min(lidar_norm) |
|
|
front_sensors = lidar_norm[self.n_obs//2-2 : self.n_obs//2+2] |
|
|
avg_front = np.mean(front_sensors) |
|
|
else: |
|
|
lidar_norm = np.ones(self.n_obs, dtype=np.float32) |
|
|
min_dist = 1.0 |
|
|
avg_front = 1.0 |
|
|
speed_norm = twist.linear.x / self.max_speed |
|
|
steering_norm = (self.current_steering / self.max_steering + 1.0) / 2.0 |
|
|
obs = np.concatenate([lidar_norm, [speed_norm, steering_norm, min_dist, avg_front]]) |
|
|
return obs.astype(np.float32), min_dist, avg_front |
|
|
|
|
|
def step(self, action): |
|
|
twist = Twist() |
|
|
|
|
|
throttle_input = (float(action[0]) + 1.0) / 2.0 |
|
|
twist.linear.x = throttle_input * self.max_speed |
|
|
target_steering = float(action[1]) * self.max_steering |
|
|
self.current_steering = (1.0 - self.steering_smooth_factor) * self.current_steering + \ |
|
|
(self.steering_smooth_factor * target_steering) |
|
|
twist.angular.z = self.current_steering |
|
|
self.pub_cmd.publish(twist) |
|
|
|
|
|
|
|
|
self.scan_received = False |
|
|
start_wait = time.time() |
|
|
while not self.scan_received: |
|
|
rclpy.spin_once(self.node, timeout_sec=0.002) |
|
|
if time.time() - start_wait > 0.1: break |
|
|
|
|
|
observation, min_dist, avg_front = self._get_observation(twist) |
|
|
real_min_dist = min_dist * self.max_range |
|
|
self.step_count += 1 |
|
|
|
|
|
|
|
|
if self.raw_scan_data is not None: |
|
|
mid_idx = len(self.raw_scan_data) // 2 |
|
|
raw_front_sector = self.raw_scan_data[mid_idx-20 : mid_idx+20] |
|
|
true_front_dist = np.min(raw_front_sector) if len(raw_front_sector) > 0 else self.max_range |
|
|
else: |
|
|
true_front_dist = real_min_dist |
|
|
|
|
|
ttc = true_front_dist / (twist.linear.x + 0.1) |
|
|
|
|
|
|
|
|
reward = 0.0 |
|
|
terminated = False |
|
|
truncated = False |
|
|
|
|
|
|
|
|
if twist.linear.x > 0.1: |
|
|
reward += twist.linear.x * (0.4 + 0.6 * avg_front) * 1.5 |
|
|
else: |
|
|
reward -= 0.1 |
|
|
|
|
|
|
|
|
if ttc < 1.2 and twist.linear.x > 0.3: |
|
|
reward -= (1.2 - ttc) * 5.0 |
|
|
|
|
|
|
|
|
if real_min_dist < 0.7: |
|
|
reward -= (0.7 - real_min_dist) ** 2 * 15.0 |
|
|
|
|
|
reward -= abs(self.current_steering) * 0.5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
COLLISION_THRESHOLD = 0.45 |
|
|
|
|
|
if real_min_dist < COLLISION_THRESHOLD or true_front_dist < COLLISION_THRESHOLD: |
|
|
reward = -100.0 |
|
|
terminated = True |
|
|
|
|
|
print(f"💥 {self.robot_name} KAZA! Mesafe: {real_min_dist:.2f}m") |
|
|
self.pub_cmd.publish(Twist()) |
|
|
return observation, reward, terminated, truncated, {} |
|
|
|
|
|
|
|
|
if twist.linear.x < 0.1 and self.step_count > 50: |
|
|
self.stuck_counter += 1 |
|
|
else: |
|
|
self.stuck_counter = 0 |
|
|
|
|
|
if self.stuck_counter > 50: |
|
|
reward = -20.0 |
|
|
terminated = True |
|
|
print(f"💤 {self.robot_name} takıldı.") |
|
|
return observation, reward, terminated, truncated, {} |
|
|
|
|
|
if self.step_count >= self.max_steps: |
|
|
truncated = True |
|
|
reward += 10.0 |
|
|
return observation, reward, terminated, truncated, {} |
|
|
|
|
|
return observation, reward, terminated, truncated, {} |
|
|
|
|
|
def reset(self, seed=None, options=None): |
|
|
super().reset(seed=seed) |
|
|
self.episode_count += 1 |
|
|
|
|
|
|
|
|
stop_cmd = Twist() |
|
|
for _ in range(3): |
|
|
self.pub_cmd.publish(stop_cmd) |
|
|
time.sleep(0.01) |
|
|
|
|
|
|
|
|
if self.episode_count < 20: |
|
|
|
|
|
if self.rank == 0: |
|
|
x, y, yaw = 0.0, 0.0, 0.0 |
|
|
else: |
|
|
x, y, yaw = 0.0, -2.0, 3.14 |
|
|
else: |
|
|
|
|
|
safe_spots = [(0,0), (0,-2), (0,2), (-1.5,0), (1.5,0)] |
|
|
idx = np.random.randint(len(safe_spots)) |
|
|
x, y = safe_spots[idx] |
|
|
x += np.random.uniform(-0.2, 0.2) |
|
|
y += np.random.uniform(-0.2, 0.2) |
|
|
yaw = np.random.uniform(-3.14, 3.14) |
|
|
|
|
|
qw = np.cos(yaw / 2) |
|
|
qz = np.sin(yaw / 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cmd = [ |
|
|
'gz', 'service', |
|
|
'-s', '/world/arena/set_pose', |
|
|
'--reqtype', 'gz.msgs.Pose', |
|
|
'--reptype', 'gz.msgs.Boolean', |
|
|
'--timeout', '2000', |
|
|
'--req', f'name: "{self.robot_name}", position: {{x: {x}, y: {y}, z: 0.2}}, orientation: {{w: {qw}, z: {qz}}}' |
|
|
] |
|
|
|
|
|
success = False |
|
|
for _ in range(5): |
|
|
try: |
|
|
|
|
|
result = subprocess.run(cmd, capture_output=True, timeout=1.0) |
|
|
if result.returncode == 0 and "data: true" in str(result.stdout): |
|
|
success = True |
|
|
break |
|
|
except: |
|
|
time.sleep(0.1) |
|
|
|
|
|
if not success: |
|
|
|
|
|
print(f"⚠️ KRİTİK: {self.robot_name} resetlenemedi! İsim hatası olabilir.") |
|
|
|
|
|
|
|
|
|
|
|
time.sleep(0.1) |
|
|
self.scan_data = None |
|
|
self.scan_received = False |
|
|
|
|
|
|
|
|
start_wait = time.time() |
|
|
while not self.scan_received and (time.time() - start_wait) < 0.5: |
|
|
rclpy.spin_once(self.node, timeout_sec=0.01) |
|
|
|
|
|
dummy_twist = Twist() |
|
|
obs, _, _ = self._get_observation(dummy_twist) |
|
|
|
|
|
self.step_count = 0 |
|
|
self.stuck_counter = 0 |
|
|
return obs, {} |
|
|
|
|
|
|
|
|
def make_env(rank): |
|
|
def _init(): |
|
|
env = RcCarEnv(rank=rank) |
|
|
return env |
|
|
return _init |
|
|
|
|
|
if __name__ == '__main__': |
|
|
log_dir = "./logs_multi_bot/" |
|
|
os.makedirs(log_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
num_cpu = 2 |
|
|
|
|
|
print(f"🚀 {num_cpu} ROBOT İLE PARALEL EĞİTİM BAŞLIYOR...") |
|
|
|
|
|
|
|
|
|
|
|
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) |
|
|
|
|
|
model = PPO( |
|
|
"MlpPolicy", |
|
|
env, |
|
|
verbose=1, |
|
|
tensorboard_log=log_dir, |
|
|
use_sde=True, |
|
|
learning_rate=3e-4, |
|
|
n_steps=2048 // num_cpu, |
|
|
batch_size=64, |
|
|
gamma=0.99, |
|
|
ent_coef=0.03 |
|
|
) |
|
|
|
|
|
|
|
|
model.learn(total_timesteps=1000000, progress_bar=True) |
|
|
model.save("rc_car_multi_final") |
|
|
print("✅ ÇOKLU EĞİTİM TAMAMLANDI!") |