f75feca c32d4af f75feca 7e57563 f75feca dab6e31 f75feca
1
2
3
4
5
6
7
8
9
10
def step(self, action): reward = 0.1 # base reward (avoid 0) if action["label"] == self.current["label"]: reward += 0.4 if action["reply"] == self.current["reply"]: reward += 0.4 return reward