#G
import gymnasium as gym
import numpy as np
from collections import deque
import cv2 

class CarRacingEnvWrapper(gym.Wrapper):
    def __init__(self, env, num_stack_frames=4, grayscale=True, resize_dim=(84, 84)):
        super().__init__(env)
        self.num_stack_frames = num_stack_frames
        self.grayscale = grayscale
        self.resize_dim = resize_dim 

        self.frames = deque(maxlen=num_stack_frames)

        original_shape = self.env.observation_space.shape
        if grayscale:
        
            original_shape = original_shape[:-1] 

        if resize_dim:
            self.observation_shape = (resize_dim[1], resize_dim[0])
        else:
            self.observation_shape = original_shape[:2]

        self.observation_space = gym.spaces.Box(
            low=0, high=255,
            shape=(self.observation_shape[0], self.observation_shape[1], num_stack_frames),
            dtype=np.uint8 
        )

        self.OFF_TRACK_PENALTY_SCALE = 0.1 
        self.GRASS_COLOR_THRESHOLD = 180 

    def _preprocess_frame(self, frame):

        if self.grayscale:
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        if self.resize_dim:
            frame = cv2.resize(frame, self.resize_dim, interpolation=cv2.INTER_AREA)
        return frame

    def reset(self, **kwargs):
        observation, info = self.env.reset(**kwargs)
        processed_frame = self._preprocess_frame(observation)

        for _ in range(self.num_stack_frames):
            self.frames.append(processed_frame)

        stacked_frames = np.stack(self.frames, axis=-1)
        return stacked_frames, info

    def step(self, action):
        
        observation, reward, terminated, truncated, info = self.env.step(action)

        modified_reward = reward

        is_on_grass = np.mean(observation[:, :, 1]) > self.GRASS_COLOR_THRESHOLD
        
        if is_on_grass:
            modified_reward -= self.OFF_TRACK_PENALTY_SCALE
           
        info['is_on_grass'] = is_on_grass
        info['original_reward'] = reward
        info['modified_reward'] = modified_reward

        processed_frame = self._preprocess_frame(observation)

        self.frames.append(processed_frame)
        stacked_frames = np.stack(self.frames, axis=-1)

        return stacked_frames, modified_reward, terminated, truncated, info
#D