Reduce inspection bonus to close reward farming loophole
Browse files
cleanops_env/environment.py
CHANGED
|
@@ -140,7 +140,7 @@ class CleanOpsEnvironment(Environment[DataCleaningAction, DataCleaningObservatio
|
|
| 140 |
self._focus_table_name = table_name
|
| 141 |
if table_name not in self._state.inspected_tables:
|
| 142 |
self._state.inspected_tables.append(table_name)
|
| 143 |
-
insight_bonus = 0.
|
| 144 |
status_message = f"Inspected table '{table_name}'."
|
| 145 |
else:
|
| 146 |
noop_penalty = -0.02
|
|
@@ -155,7 +155,7 @@ class CleanOpsEnvironment(Environment[DataCleaningAction, DataCleaningObservatio
|
|
| 155 |
self._focus_operation_detail = self._build_operation_detail(self._task_spec, operation_id, self._state.tables, None)
|
| 156 |
if operation_id not in self._state.inspected_operations:
|
| 157 |
self._state.inspected_operations.append(operation_id)
|
| 158 |
-
insight_bonus = 0.
|
| 159 |
status_message = f"Inspected operation '{operation_id}'."
|
| 160 |
else:
|
| 161 |
noop_penalty = -0.02
|
|
@@ -342,4 +342,3 @@ class CleanOpsEnvironment(Environment[DataCleaningAction, DataCleaningObservatio
|
|
| 342 |
why_it_matters=operation.why_it_matters,
|
| 343 |
change_preview=preview,
|
| 344 |
)
|
| 345 |
-
|
|
|
|
| 140 |
self._focus_table_name = table_name
|
| 141 |
if table_name not in self._state.inspected_tables:
|
| 142 |
self._state.inspected_tables.append(table_name)
|
| 143 |
+
insight_bonus = 0.01
|
| 144 |
status_message = f"Inspected table '{table_name}'."
|
| 145 |
else:
|
| 146 |
noop_penalty = -0.02
|
|
|
|
| 155 |
self._focus_operation_detail = self._build_operation_detail(self._task_spec, operation_id, self._state.tables, None)
|
| 156 |
if operation_id not in self._state.inspected_operations:
|
| 157 |
self._state.inspected_operations.append(operation_id)
|
| 158 |
+
insight_bonus = 0.01
|
| 159 |
status_message = f"Inspected operation '{operation_id}'."
|
| 160 |
else:
|
| 161 |
noop_penalty = -0.02
|
|
|
|
| 342 |
why_it_matters=operation.why_it_matters,
|
| 343 |
change_preview=preview,
|
| 344 |
)
|
|
|