saiteja-coder's picture
Update env.py
f75feca verified
raw
history blame contribute delete
228 Bytes
def step(self, action):
reward = 0.1 # base reward (avoid 0)
if action["label"] == self.current["label"]:
reward += 0.4
if action["reply"] == self.current["reply"]:
reward += 0.4
return reward