Spaces:

kumar6591
/

data-quality-env

Sleeping

App Files Files Community

Hemanth Kunta commited on Apr 8

Commit

aa25459

1 Parent(s): ae0d0fa

Fix invalid rewards and Space query guards

Browse files

Files changed (3) hide show

env/app.py +1 -1
openenv.yaml +2 -2
space_app.py +39 -0

env/app.py CHANGED Viewed

@@ -101,7 +101,7 @@ def step(payload: dict):
             result = engine.execute(action.sql)
             if isinstance(result, str) and result.startswith("ERROR"):
                 obs = _make_observation(task, state, engine, table_names, None, result, None)
-                reward = Reward(value=-0.1, breakdown=_zero_breakdown(destructive=-0.1), done=False, info={"error": result})
             else:
                 state.query_credits -= 1
                 obs = _make_observation(task, state, engine, table_names, result if isinstance(result, list) else None, None, None)

             result = engine.execute(action.sql)
             if isinstance(result, str) and result.startswith("ERROR"):
                 obs = _make_observation(task, state, engine, table_names, None, result, None)
+                reward = Reward(value=0.0, breakdown=_zero_breakdown(), done=False, info={"error": result})
             else:
                 state.query_credits -= 1
                 obs = _make_observation(task, state, engine, table_names, result if isinstance(result, list) else None, None, None)

openenv.yaml CHANGED Viewed

@@ -70,7 +70,7 @@ observation_space:
     last_action_error: "string | null"
     last_fix_score: "float | null"
-reward_range: [-0.1, 1.25]
 reward_design:
   audit_score: "0.0–1.0, Brier-adjusted per finding confidence"
@@ -78,7 +78,7 @@ reward_design:
   valid_query_finds_issue: "+0.1 for valid SQL that surfaces NULLs, duplicates, or other clear audit evidence"
   budget_bonus: "up to +0.10 for early report submission"
   fix_bonus: "up to +0.25 for correct fix_sql repairs"
-  destructive_sql_penalty: -0.1
 api:
   reset: "POST /reset  {task_id: int, seed: int}"

     last_action_error: "string | null"
     last_fix_score: "float | null"
+reward_range: [0.0, 1.25]
 reward_design:
   audit_score: "0.0–1.0, Brier-adjusted per finding confidence"
   valid_query_finds_issue: "+0.1 for valid SQL that surfaces NULLs, duplicates, or other clear audit evidence"
   budget_bonus: "up to +0.10 for early report submission"
   fix_bonus: "up to +0.25 for correct fix_sql repairs"
+  invalid_sql_penalty: 0.0
 api:
   reset: "POST /reset  {task_id: int, seed: int}"

space_app.py CHANGED Viewed

@@ -72,6 +72,36 @@ def heuristic_queries(task_id: int) -> list[str]:
     ]
 def normalize_command(text: str) -> str:
     return (text or "").strip()
@@ -143,6 +173,11 @@ def run_query(sql_text: str, current_obs: dict[str, Any] | None, chat: list[dict
         chat = chat + [{"role": "assistant", "content": "Send a SQL query first."}]
         return chat, format_observation(current_obs), session_status(current_obs), format_reward({}), current_obs
     out = SESSION.step({"action": {"action_type": "query", "sql": sql}})
     obs = out.get("observation")
     reward = out.get("reward")
@@ -180,6 +215,10 @@ def auto_audit(current_obs: dict[str, Any] | None, chat: list[dict[str, str]]):
     obs = current_obs
     reward = None
     for sql in queries:
         out = SESSION.step({"action": {"action_type": "query", "sql": sql}})
         obs = out.get("observation")
         reward = out.get("reward")

     ]
+def current_tables(obs: dict[str, Any] | None) -> set[str]:
+    tables = (obs or {}).get("tables") or {}
+    return {str(name).lower() for name in tables.keys()}
+def referenced_tables(sql_text: str) -> set[str]:
+    sql = normalize_command(sql_text)
+    matches = re.finditer(r"\b(?:from|join)\s+([a-zA-Z_][\w\.]*)", sql, flags=re.IGNORECASE)
+    refs: set[str] = set()
+    for match in matches:
+        identifier = match.group(1).split(".")[-1].lower()
+        if identifier:
+            refs.add(identifier)
+    return refs
+def validate_query_tables(sql_text: str, obs: dict[str, Any] | None) -> str | None:
+    allowed = current_tables(obs)
+    if not allowed:
+        return None
+    refs = referenced_tables(sql_text)
+    if not refs:
+        return None
+    unknown = sorted(refs - allowed)
+    if unknown:
+        available = ", ".join(sorted(allowed))
+        return f"This task only exposes: {available}. Please query one of those tables instead of: {', '.join(unknown)}."
+    return None
 def normalize_command(text: str) -> str:
     return (text or "").strip()
         chat = chat + [{"role": "assistant", "content": "Send a SQL query first."}]
         return chat, format_observation(current_obs), session_status(current_obs), format_reward({}), current_obs
+    table_error = validate_query_tables(sql, current_obs)
+    if table_error:
+        chat = chat + [{"role": "assistant", "content": table_error}]
+        return chat, format_observation(current_obs), session_status(current_obs), format_reward({"value": 0.0, "done": False}), current_obs
     out = SESSION.step({"action": {"action_type": "query", "sql": sql}})
     obs = out.get("observation")
     reward = out.get("reward")
     obs = current_obs
     reward = None
     for sql in queries:
+        table_error = validate_query_tables(sql, obs)
+        if table_error:
+            running_chat.append({"role": "assistant", "content": table_error})
+            continue
         out = SESSION.step({"action": {"action_type": "query", "sql": sql}})
         obs = out.get("observation")
         reward = out.get("reward")