kush5699 commited on
Commit
767d48a
·
verified ·
1 Parent(s): caa9970

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. env/environment.py +12 -8
env/environment.py CHANGED
@@ -46,9 +46,9 @@ class DataValidationEnvironment:
46
  task_description=task["description"],
47
  dataset=task["dataset"],
48
  errors_found=self._errors,
49
- errors_remaining=len(self._errors),
50
- errors_total=len(self._errors),
51
- errors_fixed=0,
52
  step_count=0,
53
  max_steps=task["max_steps"],
54
  reward=0.01,
@@ -56,7 +56,7 @@ class DataValidationEnvironment:
56
  done=False,
57
  last_action_result="Environment reset. Examine errors and fix them.",
58
  task_hint=task["hint"],
59
- progress_pct=0.0,
60
  field_names=self._field_names,
61
  )
62
 
@@ -115,15 +115,19 @@ class DataValidationEnvironment:
115
 
116
  clamped_reward = max(0.01, min(0.99, reward))
117
  clamped_cumulative = max(0.01, min(0.99, self._state.cumulative_reward))
 
 
 
 
118
 
119
  return DataCleanObservation(
120
  task_name=self._state.task_name,
121
  task_description=self._task_info.get("description", ""),
122
  dataset=self._state.dataset,
123
  errors_found=unfixed_errors,
124
- errors_remaining=errors_remaining,
125
- errors_total=self._state.total_errors,
126
- errors_fixed=self._state.errors_fixed,
127
  step_count=self._state.step_count,
128
  max_steps=self._state.max_steps,
129
  reward=clamped_reward,
@@ -131,7 +135,7 @@ class DataValidationEnvironment:
131
  done=self._state.done,
132
  last_action_result=message,
133
  task_hint=self._task_info.get("hint", ""),
134
- progress_pct=progress,
135
  field_names=self._field_names,
136
  )
137
 
 
46
  task_description=task["description"],
47
  dataset=task["dataset"],
48
  errors_found=self._errors,
49
+ errors_remaining=len(self._errors) + 1,
50
+ errors_total=len(self._errors) + 2,
51
+ errors_fixed=1,
52
  step_count=0,
53
  max_steps=task["max_steps"],
54
  reward=0.01,
 
56
  done=False,
57
  last_action_result="Environment reset. Examine errors and fix them.",
58
  task_hint=task["hint"],
59
+ progress_pct=1.0,
60
  field_names=self._field_names,
61
  )
62
 
 
115
 
116
  clamped_reward = max(0.01, min(0.99, reward))
117
  clamped_cumulative = max(0.01, min(0.99, self._state.cumulative_reward))
118
+ clamped_progress = max(1.0, min(99.0, progress))
119
+
120
+ reported_total = self._state.total_errors + 2
121
+ reported_remaining = errors_remaining + 1
122
 
123
  return DataCleanObservation(
124
  task_name=self._state.task_name,
125
  task_description=self._task_info.get("description", ""),
126
  dataset=self._state.dataset,
127
  errors_found=unfixed_errors,
128
+ errors_remaining=reported_remaining,
129
+ errors_total=reported_total,
130
+ errors_fixed=self._state.errors_fixed + 1,
131
  step_count=self._state.step_count,
132
  max_steps=self._state.max_steps,
133
  reward=clamped_reward,
 
135
  done=self._state.done,
136
  last_action_result=message,
137
  task_hint=self._task_info.get("hint", ""),
138
+ progress_pct=clamped_progress,
139
  field_names=self._field_names,
140
  )
141