import os import time import math import numpy as np import gymnasium as gym from gymnasium import spaces import rclpy from rclpy.node import Node from rclpy.qos import QoSProfile, ReliabilityPolicy from geometry_msgs.msg import Twist from sensor_msgs.msg import LaserScan from stable_baselines3 import PPO from stable_baselines3.common.callbacks import CheckpointCallback from stable_baselines3.common.vec_env import SubprocVecEnv # <--- SİHİR BURADA import torch import subprocess # --- ENV SINIFI (NAMESPACE DESTEKLİ) --- class RcCarEnv(gym.Env): def __init__(self, rank=0): super(RcCarEnv, self).__init__() self.rank = rank self.robot_name = f"bot{rank + 1}" if not rclpy.ok(): rclpy.init() self.node = rclpy.create_node(f'rl_agent_{self.robot_name}') qos_best_effort = QoSProfile(reliability=ReliabilityPolicy.BEST_EFFORT, depth=10) # --- DÜZELTME BURADA --- # Launch dosyasındaki Bridge'de: /model/bot1/cmd_vel ve /model/bot1/scan demiştik. # Python kodu da TAM OLARAK bunları kullanmalı. cmd_topic = f'/model/{self.robot_name}/cmd_vel' scan_topic = f'/model/{self.robot_name}/scan' # <-- ESKİSİ: f'/{self.robot_name}/scan' İDİ. HATALIYDI. self.pub_cmd = self.node.create_publisher(Twist, cmd_topic, 10) self.sub_scan = self.node.create_subscription(LaserScan, scan_topic, self.scan_cb, qos_best_effort) # ... (Geri kalan parametreler aynı) ... self.max_speed = 1.0 self.max_steering = 0.5 self.steering_smooth_factor = 0.25 self.current_steering = 0.0 self.scan_data = None self.raw_scan_data = None self.scan_received = False self.n_obs = 16 self.max_range = 10.0 self.step_count = 0 self.max_steps = 2500 self.stuck_counter = 0 self.episode_count = 0 self.action_space = spaces.Box(low=np.array([-1.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float32) self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(self.n_obs + 4,), dtype=np.float32) print(f"✅ {self.robot_name} BAĞLANDI! Cmd: {cmd_topic} | Scan: {scan_topic}") def scan_cb(self, msg): raw = np.array(msg.ranges) raw = np.where(np.isinf(raw), self.max_range, raw) raw = np.where(np.isnan(raw), self.max_range, raw) self.raw_scan_data = np.clip(raw, 0.0, self.max_range) chunk = len(self.raw_scan_data) // self.n_obs if chunk > 0: self.scan_data = np.array([np.min(self.raw_scan_data[i*chunk:(i+1)*chunk]) for i in range(self.n_obs)], dtype=np.float32) else: self.scan_data = np.full(self.n_obs, self.max_range, dtype=np.float32) self.scan_received = True def _get_observation(self, twist): if self.scan_data is not None: lidar_norm = self.scan_data / self.max_range min_dist = np.min(lidar_norm) front_sensors = lidar_norm[self.n_obs//2-2 : self.n_obs//2+2] avg_front = np.mean(front_sensors) else: lidar_norm = np.ones(self.n_obs, dtype=np.float32) min_dist = 1.0 avg_front = 1.0 speed_norm = twist.linear.x / self.max_speed steering_norm = (self.current_steering / self.max_steering + 1.0) / 2.0 obs = np.concatenate([lidar_norm, [speed_norm, steering_norm, min_dist, avg_front]]) return obs.astype(np.float32), min_dist, avg_front def step(self, action): twist = Twist() # Gaz ve Direksiyon throttle_input = (float(action[0]) + 1.0) / 2.0 twist.linear.x = throttle_input * self.max_speed target_steering = float(action[1]) * self.max_steering self.current_steering = (1.0 - self.steering_smooth_factor) * self.current_steering + \ (self.steering_smooth_factor * target_steering) twist.angular.z = self.current_steering self.pub_cmd.publish(twist) # Lidar Bekleme (Timeout korumalı) self.scan_received = False start_wait = time.time() while not self.scan_received: rclpy.spin_once(self.node, timeout_sec=0.002) if time.time() - start_wait > 0.1: break observation, min_dist, avg_front = self._get_observation(twist) real_min_dist = min_dist * self.max_range self.step_count += 1 # Ham Veri Kontrolü if self.raw_scan_data is not None: mid_idx = len(self.raw_scan_data) // 2 raw_front_sector = self.raw_scan_data[mid_idx-20 : mid_idx+20] true_front_dist = np.min(raw_front_sector) if len(raw_front_sector) > 0 else self.max_range else: true_front_dist = real_min_dist ttc = true_front_dist / (twist.linear.x + 0.1) # --- ÖDÜL SİSTEMİ --- reward = 0.0 terminated = False truncated = False # İlerleme if twist.linear.x > 0.1: reward += twist.linear.x * (0.4 + 0.6 * avg_front) * 1.5 else: reward -= 0.1 # Çarpışma Riski Cezası if ttc < 1.2 and twist.linear.x > 0.3: reward -= (1.2 - ttc) * 5.0 # Duvara Yaklaşma if real_min_dist < 0.7: reward -= (0.7 - real_min_dist) ** 2 * 15.0 reward -= abs(self.current_steering) * 0.5 # --- KRİTİK DÜZELTME BURADA --- # Eski: 0.25 (Çok yakındı, fiziksel çarpışma önce oluyordu) # Yeni: 0.45 (Artık robot duvara burnunu değdirince reset atacak) COLLISION_THRESHOLD = 0.45 if real_min_dist < COLLISION_THRESHOLD or true_front_dist < COLLISION_THRESHOLD: reward = -100.0 # Büyük ceza terminated = True # Hata ayıklama için yazdır (Hangi robot, kaç metrede çarptı?) print(f"💥 {self.robot_name} KAZA! Mesafe: {real_min_dist:.2f}m") self.pub_cmd.publish(Twist()) # Durdur return observation, reward, terminated, truncated, {} # Stuck Kontrolü if twist.linear.x < 0.1 and self.step_count > 50: self.stuck_counter += 1 else: self.stuck_counter = 0 if self.stuck_counter > 50: reward = -20.0 terminated = True print(f"💤 {self.robot_name} takıldı.") return observation, reward, terminated, truncated, {} if self.step_count >= self.max_steps: truncated = True reward += 10.0 return observation, reward, terminated, truncated, {} return observation, reward, terminated, truncated, {} def reset(self, seed=None, options=None): super().reset(seed=seed) self.episode_count += 1 # Durdur stop_cmd = Twist() for _ in range(3): self.pub_cmd.publish(stop_cmd) time.sleep(0.01) # Güvenli Koordinatlar if self.episode_count < 20: # Başlangıçta sabit, güvenli, ayrı yerler if self.rank == 0: x, y, yaw = 0.0, 0.0, 0.0 # Bot 1 Merkez else: x, y, yaw = 0.0, -2.0, 3.14 # Bot 2 Aşağısı else: # Rastgele ama güvenli safe_spots = [(0,0), (0,-2), (0,2), (-1.5,0), (1.5,0)] idx = np.random.randint(len(safe_spots)) x, y = safe_spots[idx] x += np.random.uniform(-0.2, 0.2) y += np.random.uniform(-0.2, 0.2) yaw = np.random.uniform(-3.14, 3.14) qw = np.cos(yaw / 2) qz = np.sin(yaw / 2) # --- RESET SERVİSİ --- # self.robot_name'in "bot1" veya "bot2" olduğundan eminiz. # Ancak Gazebo model listesinde adı farklıysa reset çalışmaz. # Launch dosyasında "-name bot1" dedik, yani doğru olmalı. cmd = [ 'gz', 'service', '-s', '/world/arena/set_pose', '--reqtype', 'gz.msgs.Pose', '--reptype', 'gz.msgs.Boolean', '--timeout', '2000', '--req', f'name: "{self.robot_name}", position: {{x: {x}, y: {y}, z: 0.2}}, orientation: {{w: {qw}, z: {qz}}}' ] success = False for _ in range(5): try: # stderr=subprocess.PIPE ekleyerek hatayı görebiliriz result = subprocess.run(cmd, capture_output=True, timeout=1.0) if result.returncode == 0 and "data: true" in str(result.stdout): success = True break except: time.sleep(0.1) if not success: # Eğer burası yazıyorsa, Gazebo robot ismini bulamıyor demektir. print(f"⚠️ KRİTİK: {self.robot_name} resetlenemedi! İsim hatası olabilir.") # Acil durum: Model listesini bas (Sadece debug için) # os.system("gz service -s /world/arena/scene/info ...") time.sleep(0.1) self.scan_data = None self.scan_received = False # Hızlı Lidar Check start_wait = time.time() while not self.scan_received and (time.time() - start_wait) < 0.5: rclpy.spin_once(self.node, timeout_sec=0.01) dummy_twist = Twist() obs, _, _ = self._get_observation(dummy_twist) self.step_count = 0 self.stuck_counter = 0 return obs, {} # --- YARDIMCI FONKSİYON (PARALEL ÇALIŞTIRMA İÇİN) --- def make_env(rank): def _init(): env = RcCarEnv(rank=rank) return env return _init if __name__ == '__main__': log_dir = "./logs_multi_bot/" os.makedirs(log_dir, exist_ok=True) # 2 ROBOTLU PARALEL EĞİTİM num_cpu = 2 print(f"🚀 {num_cpu} ROBOT İLE PARALEL EĞİTİM BAŞLIYOR...") # Çoklu Ortam Yaratıyoruz # SubprocVecEnv: Her ortamı ayrı bir işlemci çekirdeğinde çalıştırır env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) model = PPO( "MlpPolicy", env, verbose=1, tensorboard_log=log_dir, use_sde=True, learning_rate=3e-4, n_steps=2048 // num_cpu, # Adım sayısını robot sayısına bölüyoruz batch_size=64, gamma=0.99, ent_coef=0.03 ) # Daha hızlı öğreneceği için toplam adımı artırabilirsin model.learn(total_timesteps=1000000, progress_bar=True) model.save("rc_car_multi_final") print("✅ ÇOKLU EĞİTİM TAMAMLANDI!")