Spaces:

Eishaan
/

sql-migration-env

Sleeping

App Files Files Community

Eishaan commited on Apr 12

Commit

05c4751

1 Parent(s): 62d9b39

fixed errors v2

Browse files

Files changed (6) hide show

.gitignore +5 -0
Dockerfile +3 -2
inference.py +61 -25
models.py +11 -1
server/app.py +6 -0
server/environment.py +56 -14

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__/
+*.pyc
+uv.lock
+.env
+.venv/

Dockerfile CHANGED Viewed

@@ -9,9 +9,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy all project files
 COPY . .
-# Set Python path for imports
 ENV PYTHONPATH="/app:$PYTHONPATH"
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1

 # Copy all project files
 COPY . .
+# Set environment variables for docker and huggingface
 ENV PYTHONPATH="/app:$PYTHONPATH"
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1

inference.py CHANGED Viewed

@@ -50,17 +50,18 @@ CRITICAL SQLite-specific rules (violations cause immediate errors):
 2. To change column types, add NOT NULL, or add FKs: CREATE new table, INSERT INTO new SELECT FROM old, DROP old, RENAME new.
 3. Apostrophes in data (O'Brien, O'Neill) are present — escape with '' in string literals.
 4. Execute exactly ONE SQL statement per step.
-5. For table normalization: create new tables first, INSERT INTO ... SELECT, then drop old tables.
-6. For orphaned FK rows: check the TARGET SCHEMA for the correct anomaly/issues table name (it varies per task). Log invalid records there before dropping.
-7. For text currency columns like '$90,000' or '$1,234.56': strip '$' and ',' then cast to the type in the target schema (INTEGER for whole numbers, REAL for decimals).
-8. IMPORTANT: Before writing any DDL, execute SELECT * FROM tablename LIMIT 5 for each source table to inspect the actual data format and identify edge cases like empty strings, leading whitespace, NULL values, and special characters.
-9. Do NOT set submit_final to true until you have run SELECT COUNT(*) on your target tables and verified the counts and data match what the task requires.
-10. When migration is complete and verified, set submit_final to true.
 TARGET SCHEMA (achieve this exactly):
 {target_ddl}
-Respond ONLY with valid JSON — no markdown, no code blocks, no text outside the object:
 {{"sql_command": "your SQL here", "reasoning": "why", "submit_final": false}}"""
 ALL_TASKS = [
@@ -74,6 +75,26 @@ ALL_TASKS = [
 ]
 MAX_PARSE_ERRORS = 5  # Consecutive parse errors before giving up
 AUTO_SUBMIT_THRESHOLD = 0.95
 def call_llm(messages: list, timeout: int = 90) -> str:
@@ -186,13 +207,16 @@ def run_task_local(task_name: str) -> dict:
     history = [{"role": "system", "content": task_system_prompt}]
     # Initial observation
-    initial_msg = (
-        f"CURRENT DATABASE SCHEMA:\n{obs.current_schema_sql}\n\n"
-        f"Status: {obs.last_execution_result}\n"
-        f"Migration progress: {obs.migration_progress:.2f}\n\n"
-        f"Start by inspecting the source data with SELECT queries, then begin the migration."
-    )
-    history.append({"role": "user", "content": initial_msg})
     rewards_list = []
     consecutive_parse_errors = 0  # D6: Track consecutive only
@@ -204,8 +228,8 @@ def run_task_local(task_name: str) -> dict:
         if done:
             break
-        # --- D5: Context window fix — only keep last 10 messages + system ---
-        messages = [history[0]] + history[-10:]
         try:
             raw_response = call_llm(messages)
@@ -226,11 +250,21 @@ def run_task_local(task_name: str) -> dict:
                 print(f"[STEP] step={step+1} action=MAX_PARSE_ERRORS reward=0.00 done=true error=too_many_consecutive_parse_errors", flush=True)
                 done = True
                 break
-            history.append({"role": "assistant", "content": raw_response})
-            history.append({
                 "role": "user",
-                "content": 'ERROR: Your response was not valid JSON. Respond ONLY with: {"sql_command": "...", "reasoning": "...", "submit_final": false}',
-            })
             continue
         # Build the MigrationAction
@@ -278,24 +312,26 @@ def run_task_local(task_name: str) -> dict:
         )
         # Add to conversation history
         history.append({"role": "assistant", "content": json.dumps(action_dict)})
         # --- D5: Lean feedback — NO schema repetition ---
-        feedback_msg = (
             f"EXECUTION RESULT: {obs.last_execution_result}\n"
             f"Progress: {obs.migration_progress:.2f}"
         )
         if done:
-            feedback_msg += "\n\nEpisode complete."
         elif obs.migration_progress >= 0.9:
-            feedback_msg += (
                 "\n\nMigration is nearly complete! Run SELECT COUNT(*) on each table "
                 "and compare to your expectations. If everything matches, set submit_final to true."
             )
         else:
-            feedback_msg += "\n\nContinue the migration. Write your next SQL command."
-        history.append({"role": "user", "content": feedback_msg})
     # Print END
     rewards_str = ",".join(f"{r:.2f}" for r in rewards_list) if rewards_list else "0.00"

 2. To change column types, add NOT NULL, or add FKs: CREATE new table, INSERT INTO new SELECT FROM old, DROP old, RENAME new.
 3. Apostrophes in data (O'Brien, O'Neill) are present — escape with '' in string literals.
 4. Execute exactly ONE SQL statement per step.
+5. If a table already exists, you MUST drop it before recreating it (e.g., DROP TABLE IF EXISTS users_new).
+6. SQLite strictly expects `INSERT INTO tbl VALUES (...)`, not `VALUE (...)`. Ensure column counts match exactly.
+7. For table normalization: create new tables first, INSERT INTO ... SELECT, then drop old tables.
+8. For orphaned FK rows: check the TARGET SCHEMA for the anomaly/issues table name. Log invalid records there before dropping.
+9. For text currency (e.g. '$90,000'): strip '$' and ',' then cast to the target type (INTEGER/REAL).
+10. IMPORTANT: Before writing any DDL, execute SELECT * FROM tablename LIMIT 5 to inspect the data format.
+11. Do NOT set submit_final to true until you run SELECT COUNT(*) and verify data matches the task.
 TARGET SCHEMA (achieve this exactly):
 {target_ddl}
+Respond ONLY with a valid JSON object. Do not use markdown backticks (```json). No conversational text.
 {{"sql_command": "your SQL here", "reasoning": "why", "submit_final": false}}"""
 ALL_TASKS = [
 ]
 MAX_PARSE_ERRORS = 5  # Consecutive parse errors before giving up
 AUTO_SUBMIT_THRESHOLD = 0.95
+MAX_HISTORY_PAIRS = 4  # Keep maximum of 4 user/assistant turn pairs
+def build_messages(system_prompt: str, history: list, current_obs_msg: dict) -> list:
+    """
+    Build messages explicitly pruning history to avoid context bloat.
+    """
+    system_msg = [{"role": "system", "content": system_prompt}]
+    # We only want assistant/user pairs. Filter out system msgs if any exist in history
+    filtered_history = [m for m in history if m["role"] != "system"]
+    # Keep only the last MAX_HISTORY_PAIRS * 2 messages
+    max_msgs = MAX_HISTORY_PAIRS * 2
+    if len(filtered_history) > max_msgs:
+        pruned_history = filtered_history[-max_msgs:]
+    else:
+        pruned_history = filtered_history
+    return system_msg + pruned_history + [current_obs_msg]
 def call_llm(messages: list, timeout: int = 90) -> str:
     history = [{"role": "system", "content": task_system_prompt}]
     # Initial observation
+    initial_msg = {
+        "role": "user",
+        "content": (
+            f"CURRENT DATABASE SCHEMA:\n{obs.current_schema_sql}\n\n"
+            f"Status: {obs.last_execution_result}\n"
+            f"Migration progress: {obs.migration_progress:.2f}\n\n"
+            f"Start by inspecting the source data with SELECT queries, then begin the migration."
+        )
+    }
+    history = []
     rewards_list = []
     consecutive_parse_errors = 0  # D6: Track consecutive only
         if done:
             break
+        # --- D5: Context window fix: Aggressively prune history via build_messages ---
+        messages = build_messages(task_system_prompt, history, initial_msg)
         try:
             raw_response = call_llm(messages)
                 print(f"[STEP] step={step+1} action=MAX_PARSE_ERRORS reward=0.00 done=true error=too_many_consecutive_parse_errors", flush=True)
                 done = True
                 break
+            # CRITICAL: Strip <think> tags before appending to history to prevent 413 Context OOM
+            stripped_response = re.sub(r"<think>.*?</think>", "", raw_response, flags=re.DOTALL).strip()
+            stripped_response = re.sub(r"<think>.*$", "", stripped_response, flags=re.DOTALL).strip()
+            # If it's still huge, truncate it to 500 chars to save context
+            if len(stripped_response) > 500:
+                stripped_response = stripped_response[:500] + "... [TRUNCATED DUE TO PARSE ERROR]"
+            history.append(initial_msg)  # The prompt we sent
+            history.append({"role": "assistant", "content": stripped_response}) # The stripped response
+            initial_msg = {
                 "role": "user",
+                "content": 'ERROR: Your response was not a valid JSON object. Do not use markdown blocks. Respond strictly with: {"sql_command": "...", "reasoning": "...", "submit_final": false}'
+            }
             continue
         # Build the MigrationAction
         )
         # Add to conversation history
+        history.append(initial_msg)
         history.append({"role": "assistant", "content": json.dumps(action_dict)})
         # --- D5: Lean feedback — NO schema repetition ---
+        feedback_text = (
             f"EXECUTION RESULT: {obs.last_execution_result}\n"
             f"Progress: {obs.migration_progress:.2f}"
+            f"\nSchema Diff (Missing/Extra constraints vs Target):\n{obs.schema_diff}"
         )
         if done:
+            feedback_text += "\n\nEpisode complete."
         elif obs.migration_progress >= 0.9:
+            feedback_text += (
                 "\n\nMigration is nearly complete! Run SELECT COUNT(*) on each table "
                 "and compare to your expectations. If everything matches, set submit_final to true."
             )
         else:
+            feedback_text += "\n\nContinue the migration. Write your next SQL command."
+        initial_msg = {"role": "user", "content": feedback_text}
     # Print END
     rewards_str = ",".join(f"{r:.2f}" for r in rewards_list) if rewards_list else "0.00"

models.py CHANGED Viewed

@@ -10,7 +10,7 @@ from __future__ import annotations
 from typing import Any, Dict, Optional
 from openenv.core.env_server.types import Action, Observation, State
-from pydantic import Field
 class MigrationAction(Action):
@@ -40,6 +40,11 @@ class MigrationAction(Action):
         description="Set to true when you believe the migration is complete"
     )
 class MigrationObservation(Observation):
     """
@@ -60,6 +65,7 @@ class MigrationObservation(Observation):
         step_number: Current step count (0 after reset, increments each step).
         migration_progress: Current grader score from 0.0 to 1.0.
         task_name: Name of the current task being attempted.
     """
     current_schema_sql: str = Field(
@@ -88,6 +94,10 @@ class MigrationObservation(Observation):
         default="",
         description="Name of the current task"
     )
 class MigrationState(State):

 from typing import Any, Dict, Optional
 from openenv.core.env_server.types import Action, Observation, State
+from pydantic import Field, field_validator
 class MigrationAction(Action):
         description="Set to true when you believe the migration is complete"
     )
+    @field_validator("sql_command")
+    @classmethod
+    def strip_whitespace(cls, v: str) -> str:
+        return v.strip()
 class MigrationObservation(Observation):
     """
         step_number: Current step count (0 after reset, increments each step).
         migration_progress: Current grader score from 0.0 to 1.0.
         task_name: Name of the current task being attempted.
+        schema_diff: Human-readable diff between current and target schemas.
     """
     current_schema_sql: str = Field(
         default="",
         description="Name of the current task"
     )
+    schema_diff: Optional[str] = Field(
+        default=None,
+        description="Human-readable diff between current and expected target schemas"
+    )
 class MigrationState(State):

server/app.py CHANGED Viewed

@@ -128,6 +128,11 @@ async def list_tasks() -> Dict[str, Any]:
             "reasoning": "string -- Explanation of the action (optional)",
             "submit_final": "boolean -- Set true when migration is complete (default: false)",
         },
     }
@@ -170,6 +175,7 @@ async def grade_task(
             }
     return {
         "tasks": results,
         "status": "graded",
     }

             "reasoning": "string -- Explanation of the action (optional)",
             "submit_final": "boolean -- Set true when migration is complete (default: false)",
         },
+        "example_action": {
+            "sql_command": "CREATE TABLE ...",
+            "reasoning": "Creating the new destination table before copying data.",
+            "submit_final": False
+        }
     }
             }
     return {
+        "grader_version": "1.0",
         "tasks": results,
         "status": "graded",
     }

server/environment.py CHANGED Viewed

@@ -18,6 +18,7 @@ import re
 import sqlite3
 import threading
 import uuid
 from typing import Any, Dict, List, Optional
 # Support both in-repo and standalone imports
@@ -145,11 +146,23 @@ class DbMigrationEnvironment(Environment):
                 return None, "Error: Query exceeded execution time limit (possible infinite loop). Simplify your query."
             return None, str(e)
         except sqlite3.Warning as e:
-            return None, (
-                f"Error: SQLite requires one statement per step. "
-                f"Split your commands into separate steps. Original error: {e}"
-            )
         except Exception as e:
             return None, str(e)
         finally:
             self._conn.set_progress_handler(None, 0)
@@ -214,8 +227,11 @@ class DbMigrationEnvironment(Environment):
             self._conn = None
         # Create fresh in-memory database
-        self._conn = sqlite3.connect(":memory:")
         # CRITICAL: Enable foreign key enforcement
         self._conn.execute("PRAGMA foreign_keys = ON")
@@ -239,16 +255,28 @@ class DbMigrationEnvironment(Environment):
         # Compute initial score
         initial_score = self._reconciler.score(self._conn)
         return MigrationObservation(
             done=False,
             reward=0.0,
-            current_schema_sql=self._get_current_schema(),
-            target_schema_sql=self._task_config["target_ddl"],
             last_execution_result="Environment initialized. Ready for migration.",
             step_number=0,
             migration_progress=initial_score,
             task_name=self.task_name,
             metadata={"status": "ready"},
         )
@@ -289,7 +317,11 @@ class DbMigrationEnvironment(Environment):
         sql_command = action.sql_command.strip()
         # --- A3: Dangerous SQL Blacklist ---
-        if _DANGEROUS_PATTERNS.search(sql_command):
             execution_result = (
                 "Error: This SQL command is not allowed for security reasons. "
                 "ATTACH DATABASE, DETACH DATABASE, LOAD_EXTENSION, and "
@@ -342,9 +374,9 @@ class DbMigrationEnvironment(Environment):
                     if self._is_read_query(sql_command):
                         execution_result = self._format_query_results(cursor)
                     else:
-                        rows_affected = cursor.rowcount
-                        execution_result = f"Success: {rows_affected} rows affected"
-                        # Only auto-commit if not in explicit transaction (A4)
                         if not self._in_explicit_tx:
                             try:
                                 self._conn.commit()
@@ -384,19 +416,29 @@ class DbMigrationEnvironment(Environment):
         if done:
             meta["trajectory"] = self._trajectory
         return MigrationObservation(
             done=done,
             reward=step_reward,
-            current_schema_sql=self._get_current_schema(),
-            target_schema_sql=self._task_config["target_ddl"],
             last_execution_result=execution_result,
             step_number=self._step_count,
             migration_progress=current_score,
             task_name=self.task_name,
             metadata=meta,
         )
-    @property
     def state(self) -> MigrationState:
         """Get current environment state."""
         return self._state

 import sqlite3
 import threading
 import uuid
+import difflib
 from typing import Any, Dict, List, Optional
 # Support both in-repo and standalone imports
                 return None, "Error: Query exceeded execution time limit (possible infinite loop). Simplify your query."
             return None, str(e)
         except sqlite3.Warning as e:
+            # Multi-statement fallback
+            try:
+                self._conn.executescript(sql)
+                return None, None
+            except Exception as script_e:
+                return None, f"Error (Multi-Statement Fallback Failed): {script_e}. Original error: {e}"
+        except sqlite3.OperationalError as e:
+            err_str = str(e).lower()
+            if "table" in err_str and "already exists" in err_str:
+                return None, f"Schema Error: {e}. You must DROP the old table first if replacing it."
+            if "has no column" in err_str:
+                return None, f"Schema Error: {e}. Check table columns."
+            return None, str(e)
         except Exception as e:
+            err_str = str(e).lower()
+            if "values for" in err_str and "columns" in err_str:
+                return None, f"Data Error: {e}. Ensure you are inserting the correct number of columns."
             return None, str(e)
         finally:
             self._conn.set_progress_handler(None, 0)
             self._conn = None
         # Create fresh in-memory database
+        self._conn = sqlite3.connect(":memory:", isolation_level=None)
+        # Performance PRAGMAs for Docker I/O
+        self._conn.execute("PRAGMA journal_mode = MEMORY")
         # CRITICAL: Enable foreign key enforcement
         self._conn.execute("PRAGMA foreign_keys = ON")
         # Compute initial score
         initial_score = self._reconciler.score(self._conn)
+        self._state.migration_progress = initial_score
+        current_ddl = self._get_current_schema()
+        target_ddl = self._task_config["target_ddl"]
+        diff = "\n".join(difflib.unified_diff(
+            current_ddl.splitlines(),
+            target_ddl.splitlines(),
+            fromfile="current_schema",
+            tofile="target_schema",
+            lineterm=""
+        ))
         return MigrationObservation(
             done=False,
             reward=0.0,
+            current_schema_sql=current_ddl,
+            target_schema_sql=target_ddl,
             last_execution_result="Environment initialized. Ready for migration.",
             step_number=0,
             migration_progress=initial_score,
             task_name=self.task_name,
+            schema_diff=diff if diff else "Schemas match exactly.",
             metadata={"status": "ready"},
         )
         sql_command = action.sql_command.strip()
         # --- A3: Dangerous SQL Blacklist ---
+        sql_lower = sql_command.lower()
+        if "pragma" in sql_lower and "foreign_keys" in sql_lower and "off" in sql_lower:
+            execution_result = "Security Error: Disabling PRAGMA foreign_keys is strictly explicitly forbidden."
+            action_error = "pragma_off_blocked"
+        elif _DANGEROUS_PATTERNS.search(sql_command):
             execution_result = (
                 "Error: This SQL command is not allowed for security reasons. "
                 "ATTACH DATABASE, DETACH DATABASE, LOAD_EXTENSION, and "
                     if self._is_read_query(sql_command):
                         execution_result = self._format_query_results(cursor)
                     else:
+                        rows_affected = getattr(cursor, "rowcount", -1) if cursor else -1
+                        execution_result = f"Success: Action executed. Rows affected: {rows_affected}"
+                        # Try to auto-commit
                         if not self._in_explicit_tx:
                             try:
                                 self._conn.commit()
         if done:
             meta["trajectory"] = self._trajectory
+        current_ddl = self._get_current_schema()
+        target_ddl = self._task_config["target_ddl"]
+        diff = "\n".join(difflib.unified_diff(
+            current_ddl.splitlines(),
+            target_ddl.splitlines(),
+            fromfile="current_schema",
+            tofile="target_schema",
+            lineterm=""
+        ))
         return MigrationObservation(
             done=done,
             reward=step_reward,
+            current_schema_sql=current_ddl,
+            target_schema_sql=target_ddl,
             last_execution_result=execution_result,
             step_number=self._step_count,
             migration_progress=current_score,
             task_name=self.task_name,
+            schema_diff=diff if diff else "Schemas match exactly.",
             metadata=meta,
         )
     def state(self) -> MigrationState:
         """Get current environment state."""
         return self._state