Spaces:
Sleeping
Sleeping
add metrics and a feature
Browse files- seeds.py +68 -43
- server/environment.py +12 -10
seeds.py
CHANGED
|
@@ -379,7 +379,7 @@ TASK5_TARGET_DDL = """CREATE TABLE products (
|
|
| 379 |
|
| 380 |
TASK5_EXPECTED_ROW_COUNT = 9
|
| 381 |
TASK5_EXPECTED_PRICE_SUM = round(12.50 + 9.99 + 45.00 + 32.50 + 5.99 + 99.99 + 67.00 + 23.50 + 11.00, 2)
|
| 382 |
-
|
| 383 |
|
| 384 |
|
| 385 |
def seed_task5(conn: sqlite3.Connection) -> None:
|
|
@@ -1044,6 +1044,59 @@ def golden_task7(conn: sqlite3.Connection) -> None:
|
|
| 1044 |
conn.commit()
|
| 1045 |
|
| 1046 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1047 |
# =============================================================================
|
| 1048 |
# Task Registry
|
| 1049 |
# =============================================================================
|
|
@@ -1061,13 +1114,7 @@ TASKS = {
|
|
| 1061 |
"seed_fn": seed_task4,
|
| 1062 |
"golden_fn": golden_task4,
|
| 1063 |
"target_ddl": TASK4_TARGET_DDL,
|
| 1064 |
-
"description": (
|
| 1065 |
-
"Restore deleted products from the deletion_log table back into the products table. "
|
| 1066 |
-
"Use product_id from deletion_log (NOT the log's id column) as the product's primary key. "
|
| 1067 |
-
"Add is_deleted and deleted_at columns. Original products: is_deleted=0, deleted_at=NULL. "
|
| 1068 |
-
"Restored products: is_deleted=1, deleted_at copied from log. "
|
| 1069 |
-
"Note: stock=0 on a product does NOT mean it was deleted."
|
| 1070 |
-
),
|
| 1071 |
"difficulty": "easy",
|
| 1072 |
"max_steps": 10,
|
| 1073 |
},
|
|
@@ -1075,11 +1122,7 @@ TASKS = {
|
|
| 1075 |
"seed_fn": seed_task2,
|
| 1076 |
"golden_fn": golden_task2,
|
| 1077 |
"target_ddl": TASK2_TARGET_DDL,
|
| 1078 |
-
"description": (
|
| 1079 |
-
"Decompose the flat purchases table into normalized customers and orders tables with a FK. "
|
| 1080 |
-
"customers should have DISTINCT entries by email. "
|
| 1081 |
-
"All 7 original purchases must be preserved as individual orders linked to the correct customer."
|
| 1082 |
-
),
|
| 1083 |
"difficulty": "medium",
|
| 1084 |
"max_steps": 15,
|
| 1085 |
},
|
|
@@ -1087,12 +1130,7 @@ TASKS = {
|
|
| 1087 |
"seed_fn": seed_task5,
|
| 1088 |
"golden_fn": golden_task5,
|
| 1089 |
"target_ddl": TASK5_TARGET_DDL,
|
| 1090 |
-
"description": (
|
| 1091 |
-
"Merge products_v1 and products_v2 into a single products table. "
|
| 1092 |
-
"v1 prices are stored as TEXT ('$XX.XX') — coerce to REAL. v2 uses 'unit_cost' — rename to 'price'. "
|
| 1093 |
-
"For ID conflicts (same ID in both tables), v2 values WIN for name/price. "
|
| 1094 |
-
"Set source='v1' for v1-only, 'v2' for v2-only, 'both' for conflicts."
|
| 1095 |
-
),
|
| 1096 |
"difficulty": "medium",
|
| 1097 |
"max_steps": 15,
|
| 1098 |
},
|
|
@@ -1100,12 +1138,7 @@ TASKS = {
|
|
| 1100 |
"seed_fn": seed_task6,
|
| 1101 |
"golden_fn": golden_task6,
|
| 1102 |
"target_ddl": TASK6_TARGET_DDL,
|
| 1103 |
-
"description":
|
| 1104 |
-
"Decompose the sales_records god-table into 3NF: salespersons, customers, products, sales, data_issues. "
|
| 1105 |
-
"Route records with empty string '' customer emails to data_issues (not just NULL). "
|
| 1106 |
-
"TRIM leading/trailing whitespace from all email addresses before inserting. "
|
| 1107 |
-
"Each sale must link to the correct salesperson, customer, and product via FKs."
|
| 1108 |
-
),
|
| 1109 |
"difficulty": "medium",
|
| 1110 |
"max_steps": 15,
|
| 1111 |
},
|
|
@@ -1113,13 +1146,7 @@ TASKS = {
|
|
| 1113 |
"seed_fn": seed_task3,
|
| 1114 |
"golden_fn": golden_task3,
|
| 1115 |
"target_ddl": TASK3_TARGET_DDL,
|
| 1116 |
-
"description":
|
| 1117 |
-
"Multi-table FK cascade with type coercion, NULL handling, and orphan audit logging. "
|
| 1118 |
-
"Convert salary from TEXT ('$90000') to INTEGER (90000) by stripping '$' and ','. "
|
| 1119 |
-
"Remove employees with NULL salary and log them to audit_log with reason='null_salary'. "
|
| 1120 |
-
"Remove orphaned assets (employee_id not in employees) and log them with reason='orphaned_record'. "
|
| 1121 |
-
"Enforce NOT NULL and FK constraints on all tables."
|
| 1122 |
-
),
|
| 1123 |
"difficulty": "hard",
|
| 1124 |
"max_steps": 20,
|
| 1125 |
},
|
|
@@ -1127,18 +1154,16 @@ TASKS = {
|
|
| 1127 |
"seed_fn": seed_task7,
|
| 1128 |
"golden_fn": golden_task7,
|
| 1129 |
"target_ddl": TASK7_TARGET_DDL,
|
| 1130 |
-
"description":
|
| 1131 |
-
"Merge 6 tables from Legacy CRM + Modern SaaS into 4 unified tables. "
|
| 1132 |
-
"Cross-system customer dedup: match by email address. Set source='both' for matches, "
|
| 1133 |
-
"'legacy' or 'modern' for unmatched. "
|
| 1134 |
-
"Tier mapping (modern subscription_tier): 1=free, 2=basic, 3=premium, 4=enterprise. "
|
| 1135 |
-
"Status mapping (modern tx_status): 1=pending, 2=processing, 3=complete, 4=failed, 5=refunded. "
|
| 1136 |
-
"Legacy amounts are TEXT ('$1,234.56') — coerce to REAL. NULL currency defaults to 'USD'. "
|
| 1137 |
-
"Log orphaned transactions (user_uuid not found) to migration_issues with issue_type='orphaned_record'. "
|
| 1138 |
-
"Log customers with NULL email to migration_issues with issue_type='null_email'."
|
| 1139 |
-
),
|
| 1140 |
"difficulty": "hard",
|
| 1141 |
"max_steps": 20,
|
| 1142 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1143 |
}
|
| 1144 |
-
|
|
|
|
| 379 |
|
| 380 |
TASK5_EXPECTED_ROW_COUNT = 9
|
| 381 |
TASK5_EXPECTED_PRICE_SUM = round(12.50 + 9.99 + 45.00 + 32.50 + 5.99 + 99.99 + 67.00 + 23.50 + 11.00, 2)
|
| 382 |
+
TASK5_EXPECTED_BOTH_SOURCE_COUNT = 2 # ids 1 and 2
|
| 383 |
|
| 384 |
|
| 385 |
def seed_task5(conn: sqlite3.Connection) -> None:
|
|
|
|
| 1044 |
conn.commit()
|
| 1045 |
|
| 1046 |
|
| 1047 |
+
# =============================================================================
|
| 1048 |
+
# TASK 8: Data Poisoning & Quarantine Routing (Extreme)
|
| 1049 |
+
# =============================================================================
|
| 1050 |
+
|
| 1051 |
+
TASK8_TARGET_DDL = \"\"\"
|
| 1052 |
+
CREATE TABLE inventory (
|
| 1053 |
+
id INTEGER PRIMARY KEY,
|
| 1054 |
+
name TEXT NOT NULL,
|
| 1055 |
+
price REAL NOT NULL,
|
| 1056 |
+
sku TEXT UNIQUE
|
| 1057 |
+
);
|
| 1058 |
+
|
| 1059 |
+
CREATE TABLE inventory_quarantine (
|
| 1060 |
+
id INTEGER PRIMARY KEY,
|
| 1061 |
+
raw_name TEXT,
|
| 1062 |
+
raw_price TEXT,
|
| 1063 |
+
raw_sku TEXT,
|
| 1064 |
+
error_reason TEXT
|
| 1065 |
+
);
|
| 1066 |
+
\"\"\".strip()
|
| 1067 |
+
|
| 1068 |
+
def seed_task8(conn):
|
| 1069 |
+
conn.execute("CREATE TABLE staging_data (id INTEGER, item TEXT, cost TEXT, sku_code TEXT)")
|
| 1070 |
+
data = [
|
| 1071 |
+
(1, "Oscilloscope", "1500.00", "OSC-001"),
|
| 1072 |
+
(2, "Multimeter", " 75.50 ", "MUL-002"),
|
| 1073 |
+
(3, "Soldering Iron", "$45.00", "SLD-003"),
|
| 1074 |
+
(4, "Lead Solder", "N/A", "LSD-004"),
|
| 1075 |
+
(5, "DC Power Supply", "299.99", "PWR-005"),
|
| 1076 |
+
(6, "Unknown Device", "INVALID", "UNK-006"),
|
| 1077 |
+
(7, "Wire Strippers", "$ 12.50", "WRE-007"),
|
| 1078 |
+
]
|
| 1079 |
+
conn.executemany("INSERT INTO staging_data VALUES (?,?,?,?)", data)
|
| 1080 |
+
conn.commit()
|
| 1081 |
+
|
| 1082 |
+
def golden_task8(conn):
|
| 1083 |
+
conn.execute("CREATE TABLE inventory (id INTEGER PRIMARY KEY, name TEXT NOT NULL, price REAL NOT NULL, sku TEXT UNIQUE)")
|
| 1084 |
+
conn.execute("CREATE TABLE inventory_quarantine (id INTEGER PRIMARY KEY, raw_name TEXT, raw_price TEXT, raw_sku TEXT, error_reason TEXT)")
|
| 1085 |
+
|
| 1086 |
+
# Process staging_data
|
| 1087 |
+
cursor = conn.execute("SELECT id, item, cost, sku_code FROM staging_data")
|
| 1088 |
+
for row in cursor.fetchall():
|
| 1089 |
+
rid, name, cost, sku = row
|
| 1090 |
+
clean_cost = cost.replace("$", "").strip()
|
| 1091 |
+
|
| 1092 |
+
try:
|
| 1093 |
+
price = float(clean_cost)
|
| 1094 |
+
conn.execute("INSERT INTO inventory (id, name, price, sku) VALUES (?,?,?,?)", (rid, name, price, sku))
|
| 1095 |
+
except ValueError:
|
| 1096 |
+
conn.execute("INSERT INTO inventory_quarantine (raw_name, raw_price, raw_sku, error_reason) VALUES (?,?,?,?)",
|
| 1097 |
+
(name, cost, sku, "invalid_numeric_format"))
|
| 1098 |
+
conn.commit()
|
| 1099 |
+
|
| 1100 |
# =============================================================================
|
| 1101 |
# Task Registry
|
| 1102 |
# =============================================================================
|
|
|
|
| 1114 |
"seed_fn": seed_task4,
|
| 1115 |
"golden_fn": golden_task4,
|
| 1116 |
"target_ddl": TASK4_TARGET_DDL,
|
| 1117 |
+
"description": "Restore deleted products from the deletion_log table back into the products table. Use product_id from deletion_log (NOT the log's id column) as the primary key. Add is_deleted (1) and deleted_at values from log. Original rows stay as is_deleted=0, deleted_at=NULL.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
"difficulty": "easy",
|
| 1119 |
"max_steps": 10,
|
| 1120 |
},
|
|
|
|
| 1122 |
"seed_fn": seed_task2,
|
| 1123 |
"golden_fn": golden_task2,
|
| 1124 |
"target_ddl": TASK2_TARGET_DDL,
|
| 1125 |
+
"description": "Normalize a flat purchases table into customers and orders tables linked by customer_id (FK). Ensure customers are distinct by email.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1126 |
"difficulty": "medium",
|
| 1127 |
"max_steps": 15,
|
| 1128 |
},
|
|
|
|
| 1130 |
"seed_fn": seed_task5,
|
| 1131 |
"golden_fn": golden_task5,
|
| 1132 |
"target_ddl": TASK5_TARGET_DDL,
|
| 1133 |
+
"description": "Merge products_v1 (Legacy) and products_v2 (Modern) with ID collision logic: Modern (v2) wins. Coerce v1 price strings ($) to REAL.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1134 |
"difficulty": "medium",
|
| 1135 |
"max_steps": 15,
|
| 1136 |
},
|
|
|
|
| 1138 |
"seed_fn": seed_task6,
|
| 1139 |
"golden_fn": golden_task6,
|
| 1140 |
"target_ddl": TASK6_TARGET_DDL,
|
| 1141 |
+
"description": "Decompose sales_records into 3NF: salespersons, customers, products, and sales. Route rows with missing emails to data_issues.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
"difficulty": "medium",
|
| 1143 |
"max_steps": 15,
|
| 1144 |
},
|
|
|
|
| 1146 |
"seed_fn": seed_task3,
|
| 1147 |
"golden_fn": golden_task3,
|
| 1148 |
"target_ddl": TASK3_TARGET_DDL,
|
| 1149 |
+
"description": "Multi-table FK cascade with type coercion for salary and orphan logging for assets.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
"difficulty": "hard",
|
| 1151 |
"max_steps": 20,
|
| 1152 |
},
|
|
|
|
| 1154 |
"seed_fn": seed_task7,
|
| 1155 |
"golden_fn": golden_task7,
|
| 1156 |
"target_ddl": TASK7_TARGET_DDL,
|
| 1157 |
+
"description": "Consolidate Legacy CRM and Modern SaaS data with cross-system email deduping and complex state/type mapping.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
"difficulty": "hard",
|
| 1159 |
"max_steps": 20,
|
| 1160 |
},
|
| 1161 |
+
"data-poisoning-quarantine": {
|
| 1162 |
+
"seed_fn": seed_task8,
|
| 1163 |
+
"golden_fn": golden_task8,
|
| 1164 |
+
"target_ddl": TASK8_TARGET_DDL,
|
| 1165 |
+
"description": "The ultimate technical test: Migrate inventory from a 'poisoned' staging table. Cleanse raw price strings and route un-coerceable rows (like 'N/A') to a quarantine table while maintaining strict schema integrity.",
|
| 1166 |
+
"difficulty": "extreme",
|
| 1167 |
+
"max_steps": 15,
|
| 1168 |
+
},
|
| 1169 |
}
|
|
|
server/environment.py
CHANGED
|
@@ -178,28 +178,28 @@ class DbMigrationEnvironment(Environment):
|
|
| 178 |
self._conn.set_progress_handler(_progress_callback, 1000)
|
| 179 |
try:
|
| 180 |
cursor = self._conn.execute(sql)
|
| 181 |
-
return cursor, None
|
| 182 |
except sqlite3.OperationalError as e:
|
| 183 |
err_str = str(e).lower()
|
| 184 |
if "interrupted" in err_str or ops_count[0] > _MAX_OPS:
|
| 185 |
-
return None, "Error: Query exceeded execution time limit (possible infinite loop). Simplify your query."
|
| 186 |
if "table" in err_str and "already exists" in err_str:
|
| 187 |
-
return None, f"Schema Error: {e}. You must DROP the old table first if replacing it."
|
| 188 |
if "has no column" in err_str:
|
| 189 |
-
return None, f"Schema Error: {e}. Check table columns."
|
| 190 |
-
return None, str(e)
|
| 191 |
except sqlite3.Warning as e:
|
| 192 |
# Multi-statement fallback
|
| 193 |
try:
|
| 194 |
self._conn.executescript(sql)
|
| 195 |
-
return None, None
|
| 196 |
except Exception as script_e:
|
| 197 |
-
return None, f"Error (Multi-Statement Fallback Failed): {script_e}. Original error: {e}"
|
| 198 |
except Exception as e:
|
| 199 |
err_str = str(e).lower()
|
| 200 |
if "values for" in err_str and "columns" in err_str:
|
| 201 |
-
return None, f"Data Error: {e}. Ensure you are inserting the correct number of columns."
|
| 202 |
-
return None, str(e)
|
| 203 |
finally:
|
| 204 |
self._conn.set_progress_handler(None, 0)
|
| 205 |
|
|
@@ -404,7 +404,7 @@ class DbMigrationEnvironment(Environment):
|
|
| 404 |
self._in_explicit_tx = False
|
| 405 |
else:
|
| 406 |
# --- Normal SQL execution with timeout (A1, A2) ---
|
| 407 |
-
cursor, error = self._execute_with_timeout(sql_command)
|
| 408 |
|
| 409 |
if error:
|
| 410 |
execution_result = error
|
|
@@ -452,11 +452,13 @@ class DbMigrationEnvironment(Environment):
|
|
| 452 |
|
| 453 |
# Build metadata with reasoning and debug info
|
| 454 |
execution_ms = int((time.time() - start_time) * 1000)
|
|
|
|
| 455 |
meta = {
|
| 456 |
"reasoning": action.reasoning,
|
| 457 |
"sql_executed": action.sql_command,
|
| 458 |
"step": self._step_count,
|
| 459 |
"execution_ms": execution_ms,
|
|
|
|
| 460 |
}
|
| 461 |
if action_error:
|
| 462 |
meta["error"] = action_error
|
|
|
|
| 178 |
self._conn.set_progress_handler(_progress_callback, 1000)
|
| 179 |
try:
|
| 180 |
cursor = self._conn.execute(sql)
|
| 181 |
+
return cursor, None, ops_count[0]
|
| 182 |
except sqlite3.OperationalError as e:
|
| 183 |
err_str = str(e).lower()
|
| 184 |
if "interrupted" in err_str or ops_count[0] > _MAX_OPS:
|
| 185 |
+
return None, "Error: Query exceeded execution time limit (possible infinite loop). Simplify your query.", ops_count[0]
|
| 186 |
if "table" in err_str and "already exists" in err_str:
|
| 187 |
+
return None, f"Schema Error: {e}. You must DROP the old table first if replacing it.", ops_count[0]
|
| 188 |
if "has no column" in err_str:
|
| 189 |
+
return None, f"Schema Error: {e}. Check table columns.", ops_count[0]
|
| 190 |
+
return None, str(e), ops_count[0]
|
| 191 |
except sqlite3.Warning as e:
|
| 192 |
# Multi-statement fallback
|
| 193 |
try:
|
| 194 |
self._conn.executescript(sql)
|
| 195 |
+
return None, None, ops_count[0]
|
| 196 |
except Exception as script_e:
|
| 197 |
+
return None, f"Error (Multi-Statement Fallback Failed): {script_e}. Original error: {e}", ops_count[0]
|
| 198 |
except Exception as e:
|
| 199 |
err_str = str(e).lower()
|
| 200 |
if "values for" in err_str and "columns" in err_str:
|
| 201 |
+
return None, f"Data Error: {e}. Ensure you are inserting the correct number of columns.", ops_count[0]
|
| 202 |
+
return None, str(e), ops_count[0]
|
| 203 |
finally:
|
| 204 |
self._conn.set_progress_handler(None, 0)
|
| 205 |
|
|
|
|
| 404 |
self._in_explicit_tx = False
|
| 405 |
else:
|
| 406 |
# --- Normal SQL execution with timeout (A1, A2) ---
|
| 407 |
+
cursor, error, q_ops = self._execute_with_timeout(sql_command)
|
| 408 |
|
| 409 |
if error:
|
| 410 |
execution_result = error
|
|
|
|
| 452 |
|
| 453 |
# Build metadata with reasoning and debug info
|
| 454 |
execution_ms = int((time.time() - start_time) * 1000)
|
| 455 |
+
q_ops = q_ops if 'q_ops' in locals() else 0
|
| 456 |
meta = {
|
| 457 |
"reasoning": action.reasoning,
|
| 458 |
"sql_executed": action.sql_command,
|
| 459 |
"step": self._step_count,
|
| 460 |
"execution_ms": execution_ms,
|
| 461 |
+
"query_ops": q_ops,
|
| 462 |
}
|
| 463 |
if action_error:
|
| 464 |
meta["error"] = action_error
|