Spaces:

junaid0600
/

sql-db-engineer-agent

Sleeping

App Files Files Community

junaid0600 commited on Apr 20

Commit

8cb206e

1 Parent(s): 1eef47f

Round 2: SQL Database Engineer Agent - 24/24 tests passing

Browse files

Files changed (15) hide show

api/server.py +112 -58
blog/mini_blog.md +0 -0
dataset/easy_scenarios.json +92 -0
dataset/hard_scenarios.json +185 -0
dataset/medium_scenarios.json +137 -0
env/__pycache__/models.cpython-312.pyc +0 -0
env/environment.py +195 -130
env/models.py +93 -58
env/reward.py +203 -94
env/tasks.py +178 -63
tests/test_environment.py +3 -4
tests/test_graders.py +2 -2
training/evaluate_agent.py +0 -0
training/generate_training_data.py +0 -0
training/train_agent.py +0 -0

api/server.py CHANGED Viewed

@@ -16,7 +16,7 @@ from env.models import (
     StepResponse, ResetResponse, TaskListResponse,
     BaselineResponse, BaselineResult,
     GraderRequest, GraderResponse,
-    HealthResponse, TaskInfo
 )
 from env.tasks import task_manager, ACTION_SCHEMA
 from env.graders import grade
@@ -33,18 +33,21 @@ async def lifespan(app: FastAPI):
     environment.reset(difficulty="easy")
     yield
 # ─────────────────────────────────────────────
 #  APP DEFINITION
 # ─────────────────────────────────────────────
 app = FastAPI(
-    title       = "SQL Query Debugger — OpenEnv Environment",
     description = (
         "An OpenEnv-compliant reinforcement learning environment where AI agents "
-        "learn to debug SQL queries across syntax errors, logic bugs, and performance issues. "
-        "Built for the META x PyTorch x SST OpenEnv Hackathon."
     ),
-    version     = "1.0.0",
     lifespan    = lifespan,
     docs_url    = "/docs",
     redoc_url   = "/redoc",
@@ -72,12 +75,11 @@ async def global_exception_handler(request: Request, exc: Exception):
 # ─────────────────────────────────────────────
-#  FAVICON — fix 404
 # ─────────────────────────────────────────────
 @app.get("/favicon.ico", include_in_schema=False)
 async def favicon():
-    """Returns 204 No Content instead of 404 for favicon requests."""
     return Response(status_code=204)
@@ -87,10 +89,10 @@ async def favicon():
 @app.get("/health", response_model=HealthResponse, tags=["System"])
 async def health():
-    """Liveness check. Always returns 200. Used by HF Space health monitoring."""
     return HealthResponse(
         status  = "ok",
-        version = "1.0.0",
         uptime  = round(time.time() - _startup_time, 2)
     )
@@ -106,8 +108,8 @@ class ResetBody(BaseModel):
 @app.post("/reset", response_model=Observation, tags=["Environment"])
 async def reset(body: ResetBody = ResetBody()):
     """
-    Starts a fresh episode. Returns the initial Observation the agent sees.
-    Edge case: always returns valid Observation even if dataset issues occur.
     """
     try:
         obs = environment.reset(
@@ -129,8 +131,9 @@ async def reset(body: ResetBody = ResetBody()):
 async def step(action: Action):
     """
     Submits an action to the environment.
-    Returns (observation, reward, done, info).
-    Edge cases: null action, malformed payload, episode already done.
     """
     try:
         response = environment.step(action)
@@ -140,8 +143,8 @@ async def step(action: Action):
         return StepResponse(
             observation = environment._build_observation(),
             reward      = Reward(
-                score     = -0.1,
-                breakdown = {"validation_error": -0.1},
                 feedback  = f"Malformed action: {str(e)}"
             ),
             done = False,
@@ -157,11 +160,7 @@ async def step(action: Action):
 @app.get("/state", response_model=EpisodeState, tags=["Environment"])
 async def state():
-    """
-    Returns full current environment state.
-    Works before reset() is called — returns default empty state.
-    Always JSON-serializable. Never crashes.
-    """
     return environment.state()
@@ -172,8 +171,8 @@ async def state():
 @app.get("/tasks", response_model=TaskListResponse, tags=["Tasks"])
 async def tasks():
     """
-    Lists all 15 tasks with full action schema definitions.
-    Validator checks for action field definitions, not just task names.
     """
     all_tasks = task_manager.list_all_tasks()
     return TaskListResponse(
@@ -191,8 +190,8 @@ async def tasks():
 async def grader(request: GraderRequest):
     """
     Grades a completed episode action.
     Returns float score strictly between 0.0 and 1.0 exclusive.
-    Never crashes.
     """
     try:
         if request.action is None:
@@ -201,14 +200,42 @@ async def grader(request: GraderRequest):
                 feedback  = "No action provided for grading.",
                 breakdown = {"error": "null_action"}
             )
         score, breakdown, feedback = grade(request.action, request.task_id)
-        # Clamp strictly between 0 and 1 exclusive
         score = max(0.001, min(0.999, score))
-        return GraderResponse(
-            score     = score,
-            feedback  = feedback,
-            breakdown = breakdown
-        )
     except Exception as e:
         return GraderResponse(
             score     = 0.001,
@@ -216,6 +243,7 @@ async def grader(request: GraderRequest):
             breakdown = {"error": str(e)}
         )
 # ─────────────────────────────────────────────
 #  7. /baseline — POST
 # ─────────────────────────────────────────────
@@ -223,9 +251,8 @@ async def grader(request: GraderRequest):
 @app.post("/baseline", response_model=BaselineResponse, tags=["Baseline"])
 async def baseline():
     """
-    Runs the baseline agent against all 3 difficulty levels.
-    Returns scores JSON. Must complete within 60 seconds.
-    Edge case: OPENAI_API_KEY not set → continues with rule-based agent.
     """
     try:
         import baseline as baseline_module
@@ -236,45 +263,72 @@ async def baseline():
         return results
     except asyncio.TimeoutError:
         return BaselineResponse(
-            results=[
-                BaselineResult(
-                    task_id    = "timeout",
-                    difficulty = DifficultyLevel.EASY,
-                    score      = 0.0,
-                    steps      = 0,
-                    feedback   = "Baseline timed out after 55 seconds."
-                )
-            ],
             average_score=0.0
         )
     except Exception as e:
         return BaselineResponse(
-            results=[
-                BaselineResult(
-                    task_id    = "error",
-                    difficulty = DifficultyLevel.EASY,
-                    score      = 0.0,
-                    steps      = 0,
-                    feedback   = f"Baseline error: {str(e)}"
-                )
-            ],
             average_score=0.0
         )
 # ─────────────────────────────────────────────
-#  ROOT — project info
 # ─────────────────────────────────────────────
 @app.get("/", tags=["System"])
 async def root():
     return {
-        "name":        "SQL Query Debugger — OpenEnv Environment",
-        "version":     "1.0.0",
         "docs":        "/docs",
         "health":      "/health",
-        "endpoints":   ["/reset", "/step", "/state", "/tasks", "/grader", "/baseline", "/health"],
-        "hackathon":   "META x PyTorch x SST OpenEnv Hackathon",
-        "domain":      "SQL Query Debugging",
-        "tasks_count": 15,
-    }

     StepResponse, ResetResponse, TaskListResponse,
     BaselineResponse, BaselineResult,
     GraderRequest, GraderResponse,
+    HealthResponse, TaskInfo, ProgressResponse
 )
 from env.tasks import task_manager, ACTION_SCHEMA
 from env.graders import grade
     environment.reset(difficulty="easy")
     yield
 # ─────────────────────────────────────────────
 #  APP DEFINITION
 # ─────────────────────────────────────────────
 app = FastAPI(
+    title       = "SQL Database Engineer Agent — OpenEnv Environment",
     description = (
         "An OpenEnv-compliant reinforcement learning environment where AI agents "
+        "learn to act like senior database engineers. "
+        "The agent manages a simulated production database over 50+ steps: "
+        "inspecting slow queries, creating indexes, rewriting queries, partitioning tables. "
+        "Built for the META x PyTorch x SST OpenEnv Hackathon Finals — April 25-26, Bangalore."
     ),
+    version     = "2.0.0",
     lifespan    = lifespan,
     docs_url    = "/docs",
     redoc_url   = "/redoc",
 # ─────────────────────────────────────────────
+#  FAVICON
 # ─────────────────────────────────────────────
 @app.get("/favicon.ico", include_in_schema=False)
 async def favicon():
     return Response(status_code=204)
 @app.get("/health", response_model=HealthResponse, tags=["System"])
 async def health():
+    """Liveness check. Always returns 200."""
     return HealthResponse(
         status  = "ok",
+        version = "2.0.0",
         uptime  = round(time.time() - _startup_time, 2)
     )
 @app.post("/reset", response_model=Observation, tags=["Environment"])
 async def reset(body: ResetBody = ResetBody()):
     """
+    Starts a fresh episode. Initializes DatabaseSimulator.
+    Returns the initial Observation with DB state and slow queries.
     """
     try:
         obs = environment.reset(
 async def step(action: Action):
     """
     Submits an action to the environment.
+    Round 2 actions: inspect_query, create_index, rewrite_query,
+    partition_table, analyze_statistics, analyze_indexes, submit_report.
+    Returns (observation, reward, done, info) with DB performance delta.
     """
     try:
         response = environment.step(action)
         return StepResponse(
             observation = environment._build_observation(),
             reward      = Reward(
+                score     = 0.001,
+                breakdown = {"validation_error": 0.001},
                 feedback  = f"Malformed action: {str(e)}"
             ),
             done = False,
 @app.get("/state", response_model=EpisodeState, tags=["Environment"])
 async def state():
+    """Returns full current environment state including performance history."""
     return environment.state()
 @app.get("/tasks", response_model=TaskListResponse, tags=["Tasks"])
 async def tasks():
     """
+    Lists all 30 tasks (15 Round 2 scenarios + 15 Round 1 cases).
+    Includes complete action schema for all 15 action types.
     """
     all_tasks = task_manager.list_all_tasks()
     return TaskListResponse(
 async def grader(request: GraderRequest):
     """
     Grades a completed episode action.
+    For Round 2 submit_report: computes score from DB performance improvement.
     Returns float score strictly between 0.0 and 1.0 exclusive.
     """
     try:
         if request.action is None:
                 feedback  = "No action provided for grading.",
                 breakdown = {"error": "null_action"}
             )
+        # Round 2: submit_report grading uses DB state
+        if request.action.action_type == ActionType.SUBMIT_REPORT:
+            ep_state    = environment.state()
+            perf_history = ep_state.action_counts.get("_perf_history", [0.0])
+            baseline     = ep_state.action_counts.get("_baseline_score", 0.0)
+            best_score   = ep_state.action_counts.get("_best_score", 0.0)
+            current      = perf_history[-1] if perf_history else 0.0
+            max_possible = max(1.0, 100.0 - baseline)
+            perf_improvement = (current - baseline) / max_possible
+            step_efficiency  = 1.0 - (ep_state.step_count / max(1, 50))
+            score = round(
+                (perf_improvement * 0.60) + (step_efficiency * 0.20) + 0.10, 4
+            )
+            score = max(0.001, min(0.999, score))
+            return GraderResponse(
+                score    = score,
+                feedback = (
+                    f"DB performance: {baseline:.1f} → {current:.1f} "
+                    f"(best: {best_score:.1f}). "
+                    f"Steps used: {ep_state.step_count}/50."
+                ),
+                breakdown = {
+                    "perf_improvement": round(perf_improvement, 4),
+                    "step_efficiency":  round(step_efficiency, 4),
+                    "base_score":       0.10,
+                }
+            )
+        # Round 1 grading
         score, breakdown, feedback = grade(request.action, request.task_id)
         score = max(0.001, min(0.999, score))
+        return GraderResponse(score=score, feedback=feedback, breakdown=breakdown)
     except Exception as e:
         return GraderResponse(
             score     = 0.001,
             breakdown = {"error": str(e)}
         )
 # ─────────────────────────────────────────────
 #  7. /baseline — POST
 # ─────────────────────────────────────────────
 @app.post("/baseline", response_model=BaselineResponse, tags=["Baseline"])
 async def baseline():
     """
+    Runs the baseline agent against all difficulty levels.
+    Must complete within 60 seconds.
     """
     try:
         import baseline as baseline_module
         return results
     except asyncio.TimeoutError:
         return BaselineResponse(
+            results=[BaselineResult(
+                task_id="timeout", difficulty=DifficultyLevel.EASY,
+                score=0.0, steps=0, feedback="Baseline timed out."
+            )],
             average_score=0.0
         )
     except Exception as e:
         return BaselineResponse(
+            results=[BaselineResult(
+                task_id="error", difficulty=DifficultyLevel.EASY,
+                score=0.0, steps=0, feedback=f"Baseline error: {str(e)}"
+            )],
             average_score=0.0
         )
 # ─────────────────────────────────────────────
+#  8. /progress — GET  (Round 2 NEW)
+# ───────────────────────────────────────���─────
+@app.get("/progress", response_model=ProgressResponse, tags=["Training"])
+async def progress():
+    """
+    Returns DB performance history for training visualization.
+    Used by evaluate_agent.py to generate reward curves.
+    Shows improvement from baseline to current score.
+    """
+    ep_state     = environment.state()
+    ac           = ep_state.action_counts
+    perf_history = ac.get("_perf_history", [])
+    milestones   = ac.get("_milestones", [])
+    baseline     = ac.get("_baseline_score", 0.0)
+    target       = ac.get("_target_score", 85.0)
+    best         = ac.get("_best_score", 0.0)
+    current      = perf_history[-1] if perf_history else 0.0
+    return ProgressResponse(
+        scenario_id         = ep_state.task_id,
+        performance_score   = current,
+        baseline_score      = baseline,
+        target_score        = target,
+        improvement_history = perf_history,
+        milestones_earned   = milestones,
+        best_score          = best,
+        steps_used          = ep_state.step_count,
+        budget_remaining    = max(0, 50 - ep_state.step_count),
+        total_reward        = ep_state.total_reward,
+    )
+# ─────────────────────────────────────────────
+#  ROOT
 # ─────────────────────────────────────────────
 @app.get("/", tags=["System"])
 async def root():
     return {
+        "name":        "SQL Database Engineer Agent — OpenEnv Environment",
+        "version":     "2.0.0",
+        "tagline":     "Training LLMs to act like senior database engineers",
         "docs":        "/docs",
         "health":      "/health",
+        "endpoints":   ["/reset", "/step", "/state", "/tasks", "/grader", "/baseline", "/progress", "/health"],
+        "hackathon":   "META x PyTorch x SST OpenEnv Hackathon — Finals April 25-26 Bangalore",
+        "domain":      "Long-Horizon Database Engineering",
+        "tasks_count": 30,
+        "max_steps":   50,
+        "themes":      ["Long-Horizon Planning", "World Modeling", "Self-Improvement", "Wildcard"],
+    }

blog/mini_blog.md ADDED Viewed

File without changes

dataset/easy_scenarios.json ADDED Viewed

	@@ -0,0 +1,92 @@

+[
+  {
+    "id": "easy_s001",
+    "description": "User lookup query taking 2s on 10K users table. Missing index on email column.",
+    "tables": [
+      {"name": "users", "rows": 10000, "indexes": ["PRIMARY"], "size_mb": 8}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM users WHERE email=?", "avg_ms": 2000, "main_table": "users", "rows_examined": 10000}
+    ],
+    "missing_index_hints": [
+      {"table": "users", "columns": ["email"], "reason": "email is used in WHERE clause but has no index"}
+    ],
+    "performance_score_baseline": 8.0,
+    "target_score": 80.0,
+    "max_steps": 15,
+    "optimal_actions": ["inspect_query:q1", "analyze_indexes:users", "create_index:users:email", "submit_report"],
+    "category": "indexing"
+  },
+  {
+    "id": "easy_s002",
+    "description": "Order status query scanning 50K orders. Composite index on user_id + status needed.",
+    "tables": [
+      {"name": "orders", "rows": 50000, "indexes": ["PRIMARY"], "size_mb": 120}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM orders WHERE user_id=? AND status=?", "avg_ms": 3500, "main_table": "orders", "rows_examined": 50000}
+    ],
+    "missing_index_hints": [
+      {"table": "orders", "columns": ["user_id", "status"], "reason": "Composite WHERE clause needs composite index"}
+    ],
+    "performance_score_baseline": 5.0,
+    "target_score": 85.0,
+    "max_steps": 15,
+    "optimal_actions": ["inspect_query:q1", "create_index:orders:user_id,status", "submit_report"],
+    "category": "indexing"
+  },
+  {
+    "id": "easy_s003",
+    "description": "Product search query doing full table scan on 20K products. Index on name column fixes it.",
+    "tables": [
+      {"name": "products", "rows": 20000, "indexes": ["PRIMARY"], "size_mb": 35}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT id, name, price FROM products WHERE name LIKE ?", "avg_ms": 1800, "main_table": "products", "rows_examined": 20000}
+    ],
+    "missing_index_hints": [
+      {"table": "products", "columns": ["name"], "reason": "LIKE queries benefit from index on name"}
+    ],
+    "performance_score_baseline": 10.0,
+    "target_score": 78.0,
+    "max_steps": 15,
+    "optimal_actions": ["inspect_query:q1", "create_index:products:name", "submit_report"],
+    "category": "indexing"
+  },
+  {
+    "id": "easy_s004",
+    "description": "Session lookup hitting 15K sessions table without index. Single index solves it.",
+    "tables": [
+      {"name": "sessions", "rows": 15000, "indexes": ["PRIMARY"], "size_mb": 12}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM sessions WHERE user_id=? AND expires_at > NOW()", "avg_ms": 1500, "main_table": "sessions", "rows_examined": 15000}
+    ],
+    "missing_index_hints": [
+      {"table": "sessions", "columns": ["user_id", "expires_at"], "reason": "Composite index on user_id + expires_at needed"}
+    ],
+    "performance_score_baseline": 12.0,
+    "target_score": 80.0,
+    "max_steps": 15,
+    "optimal_actions": ["inspect_query:q1", "create_index:sessions:user_id,expires_at", "submit_report"],
+    "category": "indexing"
+  },
+  {
+    "id": "easy_s005",
+    "description": "Log table growing to 30K entries. Query filtering by level and created_at is slow.",
+    "tables": [
+      {"name": "logs", "rows": 30000, "indexes": ["PRIMARY"], "size_mb": 50}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM logs WHERE level=? AND created_at > ?", "avg_ms": 2200, "main_table": "logs", "rows_examined": 30000}
+    ],
+    "missing_index_hints": [
+      {"table": "logs", "columns": ["level", "created_at"], "reason": "Compound filter needs compound index"}
+    ],
+    "performance_score_baseline": 7.8,
+    "target_score": 80.0,
+    "max_steps": 15,
+    "optimal_actions": ["inspect_query:q1", "create_index:logs:level,created_at", "submit_report"],
+    "category": "indexing"
+  }
+]

dataset/hard_scenarios.json ADDED Viewed

	@@ -0,0 +1,185 @@

+[
+  {
+    "id": "hard_s001",
+    "description": "Financial DB: 500K transactions across 4 tables. 3 slow queries. Needs indexes, partition, and statistics.",
+    "tables": [
+      {"name": "transactions",  "rows": 500000, "indexes": ["PRIMARY"], "size_mb": 2400},
+      {"name": "accounts",      "rows": 50000,  "indexes": ["PRIMARY"], "size_mb": 80},
+      {"name": "customers",     "rows": 80000,  "indexes": ["PRIMARY"], "size_mb": 120},
+      {"name": "audit_log",     "rows": 1000000,"indexes": ["PRIMARY"], "size_mb": 5000}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM transactions WHERE account_id=? AND status=? AND created_at > ?", "avg_ms": 15000, "main_table": "transactions", "rows_examined": 500000},
+      {"id": "q2", "sql": "SELECT c.*, COUNT(t.id) FROM customers c, transactions t WHERE c.id = t.customer_id AND t.amount > ? GROUP BY c.id", "avg_ms": 22000, "main_table": "transactions", "rows_examined": 500000},
+      {"id": "q3", "sql": "SELECT * FROM audit_log WHERE entity_id=? AND entity_type=? ORDER BY created_at DESC LIMIT 100", "avg_ms": 18000, "main_table": "audit_log", "rows_examined": 1000000}
+    ],
+    "missing_index_hints": [
+      {"table": "transactions", "columns": ["account_id", "status", "created_at"], "reason": "Composite filter — high cardinality"},
+      {"table": "transactions", "columns": ["customer_id", "amount"], "reason": "JOIN + range filter"},
+      {"table": "audit_log",    "columns": ["entity_id", "entity_type", "created_at"], "reason": "Lookup + ORDER BY on huge table"}
+    ],
+    "performance_score_baseline": 4.2,
+    "target_score": 70.0,
+    "max_steps": 50,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2", "inspect_query:q3",
+      "analyze_indexes:transactions", "analyze_indexes:audit_log",
+      "create_index:transactions:account_id,status,created_at",
+      "create_index:transactions:customer_id,amount",
+      "create_index:audit_log:entity_id,entity_type,created_at",
+      "rewrite_query:q2:SELECT c.id, c.name, COUNT(t.id) as tx_count FROM customers c INNER JOIN transactions t ON c.id = t.customer_id WHERE t.amount > ? GROUP BY c.id, c.name",
+      "partition_table:audit_log",
+      "analyze_statistics:transactions",
+      "analyze_statistics:audit_log",
+      "submit_report"
+    ],
+    "category": "financial"
+  },
+  {
+    "id": "hard_s002",
+    "description": "SaaS platform: 8-table schema, 200K+ records. Dashboard queries taking 20s+. Full optimization campaign.",
+    "tables": [
+      {"name": "workspaces",    "rows": 5000,   "indexes": ["PRIMARY"], "size_mb": 10},
+      {"name": "users",         "rows": 80000,  "indexes": ["PRIMARY"], "size_mb": 120},
+      {"name": "projects",      "rows": 200000, "indexes": ["PRIMARY"], "size_mb": 450},
+      {"name": "tasks",         "rows": 800000, "indexes": ["PRIMARY"], "size_mb": 3000},
+      {"name": "comments",      "rows": 500000, "indexes": ["PRIMARY"], "size_mb": 1800},
+      {"name": "attachments",   "rows": 300000, "indexes": ["PRIMARY"], "size_mb": 900},
+      {"name": "activity_log",  "rows": 2000000,"indexes": ["PRIMARY"], "size_mb": 8000},
+      {"name": "notifications", "rows": 400000, "indexes": ["PRIMARY"], "size_mb": 600}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM tasks WHERE project_id=? AND assignee_id=? AND status != 'done' ORDER BY due_date ASC", "avg_ms": 20000, "main_table": "tasks", "rows_examined": 800000},
+      {"id": "q2", "sql": "SELECT * FROM activity_log WHERE workspace_id=? AND created_at > ? ORDER BY created_at DESC LIMIT 50", "avg_ms": 25000, "main_table": "activity_log", "rows_examined": 2000000},
+      {"id": "q3", "sql": "SELECT * FROM notifications WHERE user_id=? AND read=0", "avg_ms": 8000, "main_table": "notifications", "rows_examined": 400000}
+    ],
+    "missing_index_hints": [
+      {"table": "tasks",        "columns": ["project_id", "assignee_id", "status", "due_date"], "reason": "4-column filter + ORDER BY"},
+      {"table": "activity_log", "columns": ["workspace_id", "created_at"], "reason": "Range query on 2M row table — also partition candidate"},
+      {"table": "notifications","columns": ["user_id", "read"], "reason": "Hot path — unread notifications per user"}
+    ],
+    "performance_score_baseline": 3.8,
+    "target_score": 68.0,
+    "max_steps": 50,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2", "inspect_query:q3",
+      "analyze_indexes:tasks", "analyze_indexes:activity_log",
+      "create_index:tasks:project_id,assignee_id,status,due_date",
+      "create_index:activity_log:workspace_id,created_at",
+      "create_index:notifications:user_id,read",
+      "partition_table:activity_log",
+      "analyze_statistics:tasks",
+      "analyze_statistics:activity_log",
+      "submit_report"
+    ],
+    "category": "saas_platform"
+  },
+  {
+    "id": "hard_s003",
+    "description": "Healthcare DB: 1M patient records. Compliance queries + clinical search + audit trail all slow.",
+    "tables": [
+      {"name": "patients",        "rows": 1000000, "indexes": ["PRIMARY"], "size_mb": 4000},
+      {"name": "appointments",    "rows": 500000,  "indexes": ["PRIMARY"], "size_mb": 1500},
+      {"name": "prescriptions",   "rows": 800000,  "indexes": ["PRIMARY"], "size_mb": 2500},
+      {"name": "clinical_notes",  "rows": 1200000, "indexes": ["PRIMARY"], "size_mb": 6000}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM appointments WHERE patient_id=? AND doctor_id=? AND appointment_date BETWEEN ? AND ?", "avg_ms": 18000, "main_table": "appointments", "rows_examined": 500000},
+      {"id": "q2", "sql": "SELECT * FROM prescriptions WHERE patient_id=? AND medication_code=? AND prescribed_at > ?", "avg_ms": 14000, "main_table": "prescriptions", "rows_examined": 800000},
+      {"id": "q3", "sql": "SELECT * FROM clinical_notes WHERE patient_id=? ORDER BY created_at DESC LIMIT 20", "avg_ms": 22000, "main_table": "clinical_notes", "rows_examined": 1200000}
+    ],
+    "missing_index_hints": [
+      {"table": "appointments",  "columns": ["patient_id", "doctor_id", "appointment_date"], "reason": "Date range query + 2 foreign keys"},
+      {"table": "prescriptions", "columns": ["patient_id", "medication_code", "prescribed_at"], "reason": "Patient medication history"},
+      {"table": "clinical_notes","columns": ["patient_id", "created_at"], "reason": "Sorted history per patient on 1.2M rows"}
+    ],
+    "performance_score_baseline": 3.5,
+    "target_score": 68.0,
+    "max_steps": 50,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2", "inspect_query:q3",
+      "analyze_indexes:appointments", "analyze_indexes:clinical_notes",
+      "create_index:appointments:patient_id,doctor_id,appointment_date",
+      "create_index:prescriptions:patient_id,medication_code,prescribed_at",
+      "create_index:clinical_notes:patient_id,created_at",
+      "partition_table:clinical_notes",
+      "analyze_statistics:appointments",
+      "analyze_statistics:clinical_notes",
+      "submit_report"
+    ],
+    "category": "healthcare"
+  },
+  {
+    "id": "hard_s004",
+    "description": "Gaming leaderboard: 2M player records. Real-time ranking + history + match queries all degraded.",
+    "tables": [
+      {"name": "players",      "rows": 2000000, "indexes": ["PRIMARY"], "size_mb": 5000},
+      {"name": "matches",      "rows": 5000000, "indexes": ["PRIMARY"], "size_mb": 15000},
+      {"name": "leaderboards", "rows": 2000000, "indexes": ["PRIMARY"], "size_mb": 4000},
+      {"name": "achievements", "rows": 800000,  "indexes": ["PRIMARY"], "size_mb": 2000}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM leaderboards WHERE game_mode=? AND season=? ORDER BY score DESC LIMIT 100", "avg_ms": 30000, "main_table": "leaderboards", "rows_examined": 2000000},
+      {"id": "q2", "sql": "SELECT * FROM matches WHERE player_id=? AND game_mode=? AND played_at > ? ORDER BY played_at DESC", "avg_ms": 25000, "main_table": "matches", "rows_examined": 5000000},
+      {"id": "q3", "sql": "SELECT * FROM achievements WHERE player_id=? AND unlocked=1", "avg_ms": 12000, "main_table": "achievements", "rows_examined": 800000}
+    ],
+    "missing_index_hints": [
+      {"table": "leaderboards", "columns": ["game_mode", "season", "score"], "reason": "Sorted leaderboard by mode+season"},
+      {"table": "matches",      "columns": ["player_id", "game_mode", "played_at"], "reason": "Player history — 5M rows"},
+      {"table": "achievements", "columns": ["player_id", "unlocked"], "reason": "Unlocked achievements per player"}
+    ],
+    "performance_score_baseline": 2.8,
+    "target_score": 65.0,
+    "max_steps": 50,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2", "inspect_query:q3",
+      "analyze_indexes:leaderboards", "analyze_indexes:matches",
+      "create_index:leaderboards:game_mode,season,score",
+      "create_index:matches:player_id,game_mode,played_at",
+      "create_index:achievements:player_id,unlocked",
+      "partition_table:matches",
+      "analyze_statistics:leaderboards",
+      "analyze_statistics:matches",
+      "submit_report"
+    ],
+    "category": "gaming"
+  },
+  {
+    "id": "hard_s005",
+    "description": "Logistics platform: 6 tables, 3M shipment records. ETA queries, route optimization, and reporting all slow.",
+    "tables": [
+      {"name": "shipments",  "rows": 3000000, "indexes": ["PRIMARY"], "size_mb": 9000},
+      {"name": "routes",     "rows": 500000,  "indexes": ["PRIMARY"], "size_mb": 1500},
+      {"name": "drivers",    "rows": 100000,  "indexes": ["PRIMARY"], "size_mb": 200},
+      {"name": "vehicles",   "rows": 80000,   "indexes": ["PRIMARY"], "size_mb": 150},
+      {"name": "warehouses", "rows": 20000,   "indexes": ["PRIMARY"], "size_mb": 40},
+      {"name": "tracking",   "rows": 10000000,"indexes": ["PRIMARY"], "size_mb": 30000}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM shipments WHERE origin_warehouse=? AND status=? AND scheduled_at BETWEEN ? AND ?", "avg_ms": 28000, "main_table": "shipments", "rows_examined": 3000000},
+      {"id": "q2", "sql": "SELECT * FROM tracking WHERE shipment_id=? ORDER BY recorded_at DESC LIMIT 50", "avg_ms": 35000, "main_table": "tracking", "rows_examined": 10000000},
+      {"id": "q3", "sql": "SELECT d.*, COUNT(s.id) FROM drivers d, shipments s WHERE d.id = s.driver_id AND s.status='in_transit' GROUP BY d.id", "avg_ms": 20000, "main_table": "shipments", "rows_examined": 3000000}
+    ],
+    "missing_index_hints": [
+      {"table": "shipments", "columns": ["origin_warehouse", "status", "scheduled_at"], "reason": "3-column filter on 3M rows"},
+      {"table": "tracking",  "columns": ["shipment_id", "recorded_at"], "reason": "Lookup + sort on 10M row table — partition candidate"},
+      {"table": "shipments", "columns": ["driver_id", "status"], "reason": "JOIN + WHERE filter for driver stats"}
+    ],
+    "performance_score_baseline": 2.5,
+    "target_score": 65.0,
+    "max_steps": 50,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2", "inspect_query:q3",
+      "analyze_indexes:shipments", "analyze_indexes:tracking",
+      "create_index:shipments:origin_warehouse,status,scheduled_at",
+      "create_index:tracking:shipment_id,recorded_at",
+      "create_index:shipments:driver_id,status",
+      "rewrite_query:q3:SELECT d.id, d.name, COUNT(s.id) as active_shipments FROM drivers d INNER JOIN shipments s ON d.id = s.driver_id WHERE s.status='in_transit' GROUP BY d.id, d.name",
+      "partition_table:tracking",
+      "analyze_statistics:shipments",
+      "analyze_statistics:tracking",
+      "submit_report"
+    ],
+    "category": "logistics"
+  }
+]

dataset/medium_scenarios.json ADDED Viewed

	@@ -0,0 +1,137 @@

+[
+  {
+    "id": "medium_s001",
+    "description": "E-commerce DB: 50K orders + 8K users. Two slow queries. Composite indexes + statistics update needed.",
+    "tables": [
+      {"name": "orders", "rows": 50000, "indexes": ["PRIMARY"], "size_mb": 280},
+      {"name": "users",  "rows": 8000,  "indexes": ["PRIMARY", "email_idx"], "size_mb": 15}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM orders WHERE user_id=? AND status=?", "avg_ms": 8500, "main_table": "orders", "rows_examined": 50000},
+      {"id": "q2", "sql": "SELECT COUNT(*) FROM orders o JOIN users u ON o.user_id=u.id WHERE u.country=?", "avg_ms": 3200, "main_table": "orders", "rows_examined": 50000}
+    ],
+    "missing_index_hints": [
+      {"table": "orders", "columns": ["user_id", "status"], "reason": "Composite WHERE filter"},
+      {"table": "users",  "columns": ["country"], "reason": "JOIN + WHERE filter on country"}
+    ],
+    "performance_score_baseline": 12.5,
+    "target_score": 75.0,
+    "max_steps": 25,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2",
+      "analyze_indexes:orders", "analyze_indexes:users",
+      "create_index:orders:user_id,status",
+      "create_index:users:country",
+      "analyze_statistics:orders",
+      "submit_report"
+    ],
+    "category": "multi_table"
+  },
+  {
+    "id": "medium_s002",
+    "description": "Blog platform: 100K posts + 20K authors. Search and author lookup queries both slow.",
+    "tables": [
+      {"name": "posts",   "rows": 100000, "indexes": ["PRIMARY"], "size_mb": 450},
+      {"name": "authors", "rows": 20000,  "indexes": ["PRIMARY"], "size_mb": 40}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM posts WHERE author_id=? AND published=1 ORDER BY created_at DESC", "avg_ms": 6000, "main_table": "posts", "rows_examined": 100000},
+      {"id": "q2", "sql": "SELECT * FROM authors WHERE username=?", "avg_ms": 2100, "main_table": "authors", "rows_examined": 20000}
+    ],
+    "missing_index_hints": [
+      {"table": "posts",   "columns": ["author_id", "published", "created_at"], "reason": "Multi-column filter + ORDER BY"},
+      {"table": "authors", "columns": ["username"], "reason": "Unique lookup by username"}
+    ],
+    "performance_score_baseline": 9.0,
+    "target_score": 78.0,
+    "max_steps": 25,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2",
+      "create_index:posts:author_id,published,created_at",
+      "create_index:authors:username",
+      "submit_report"
+    ],
+    "category": "multi_table"
+  },
+  {
+    "id": "medium_s003",
+    "description": "Inventory system: 80K products + 200K stock movements. Two queries needing index + rewrite.",
+    "tables": [
+      {"name": "products",        "rows": 80000,  "indexes": ["PRIMARY"], "size_mb": 200},
+      {"name": "stock_movements", "rows": 200000, "indexes": ["PRIMARY"], "size_mb": 600}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM stock_movements WHERE product_id=? AND movement_type=? AND created_at > ?", "avg_ms": 9000, "main_table": "stock_movements", "rows_examined": 200000},
+      {"id": "q2", "sql": "SELECT p.*, SUM(sm.quantity) FROM products p, stock_movements sm WHERE p.id = sm.product_id GROUP BY p.id", "avg_ms": 12000, "main_table": "products", "rows_examined": 200000}
+    ],
+    "missing_index_hints": [
+      {"table": "stock_movements", "columns": ["product_id", "movement_type", "created_at"], "reason": "Composite filter on 3 columns"},
+      {"table": "products", "columns": ["id"], "reason": "JOIN column — rewrite implicit JOIN to INNER JOIN"}
+    ],
+    "performance_score_baseline": 6.5,
+    "target_score": 72.0,
+    "max_steps": 30,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2",
+      "create_index:stock_movements:product_id,movement_type,created_at",
+      "rewrite_query:q2:SELECT p.id, p.name, SUM(sm.quantity) FROM products p INNER JOIN stock_movements sm ON p.id = sm.product_id GROUP BY p.id",
+      "analyze_statistics:stock_movements",
+      "submit_report"
+    ],
+    "category": "rewrite_and_index"
+  },
+  {
+    "id": "medium_s004",
+    "description": "Ticketing system: 60K tickets + 5K agents. Status queue and agent workload queries are slow.",
+    "tables": [
+      {"name": "tickets", "rows": 60000, "indexes": ["PRIMARY"], "size_mb": 180},
+      {"name": "agents",  "rows": 5000,  "indexes": ["PRIMARY"], "size_mb": 8}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM tickets WHERE status=? AND priority=? ORDER BY created_at ASC", "avg_ms": 5500, "main_table": "tickets", "rows_examined": 60000},
+      {"id": "q2", "sql": "SELECT agent_id, COUNT(*) as open_count FROM tickets WHERE status='open' GROUP BY agent_id", "avg_ms": 4200, "main_table": "tickets", "rows_examined": 60000}
+    ],
+    "missing_index_hints": [
+      {"table": "tickets", "columns": ["status", "priority", "created_at"], "reason": "Three-column filter with ORDER BY"},
+      {"table": "tickets", "columns": ["status", "agent_id"], "reason": "GROUP BY + WHERE filter"}
+    ],
+    "performance_score_baseline": 11.0,
+    "target_score": 76.0,
+    "max_steps": 25,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2",
+      "analyze_indexes:tickets",
+      "create_index:tickets:status,priority,created_at",
+      "create_index:tickets:status,agent_id",
+      "submit_report"
+    ],
+    "category": "multi_index"
+  },
+  {
+    "id": "medium_s005",
+    "description": "Analytics DB: 150K events + 10K users. Event funnel query and user lookup both need optimization.",
+    "tables": [
+      {"name": "events", "rows": 150000, "indexes": ["PRIMARY"], "size_mb": 700},
+      {"name": "users",  "rows": 10000,  "indexes": ["PRIMARY"], "size_mb": 20}
+    ],
+    "slow_queries": [
+      {"id": "q1", "sql": "SELECT * FROM events WHERE user_id=? AND event_type=? AND occurred_at BETWEEN ? AND ?", "avg_ms": 11000, "main_table": "events", "rows_examined": 150000},
+      {"id": "q2", "sql": "SELECT * FROM users WHERE signup_source=? AND created_at > ?", "avg_ms": 3000, "main_table": "users", "rows_examined": 10000}
+    ],
+    "missing_index_hints": [
+      {"table": "events", "columns": ["user_id", "event_type", "occurred_at"], "reason": "Range query on 3 columns"},
+      {"table": "users",  "columns": ["signup_source", "created_at"], "reason": "Composite filter on signup data"}
+    ],
+    "performance_score_baseline": 5.5,
+    "target_score": 74.0,
+    "max_steps": 30,
+    "optimal_actions": [
+      "inspect_query:q1", "inspect_query:q2",
+      "create_index:events:user_id,event_type,occurred_at",
+      "create_index:users:signup_source,created_at",
+      "analyze_statistics:events",
+      "submit_report"
+    ],
+    "category": "analytics"
+  }
+]

env/__pycache__/models.cpython-312.pyc CHANGED Viewed

Binary files a/env/__pycache__/models.cpython-312.pyc and b/env/__pycache__/models.cpython-312.pyc differ

env/environment.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import time
 import random
 from typing import Optional
@@ -9,29 +15,28 @@ from env.models import (
 )
 from env.tasks import task_manager
 from env.reward import compute_reward, is_done, MAX_STEPS
 class SQLDebuggerEnvironment:
     """
-    OpenEnv-compliant SQL Query Debugger Environment.
-    Implements the 3 required methods:
-        reset()  → Observation
-        step()   → (Observation, Reward, done, info)
-        state()  → EpisodeState
-    Design principles:
-    - Dense reward signal at every step
-    - No state leakage between episodes
-    - Graceful handling of all edge cases
-    - Deterministic grading
-    - Thread-safe episode state
     """
     def __init__(self):
-        self._state        = EpisodeState()
-        self._current_task = None
-        self._started_at   = None
     # ─────────────────────────────────────────────
     #  reset() → Observation
@@ -39,14 +44,8 @@ class SQLDebuggerEnvironment:
     def reset(self, difficulty: Optional[str] = None, task_id: Optional[str] = None) -> Observation:
         """
-        Starts a fresh episode. Clears ALL state from previous episode.
-        Loads a new task from the dataset.
-        Returns the initial Observation the agent sees.
-        Edge cases handled:
-        - reset() called mid-episode → cleanly resets, no state leakage
-        - invalid difficulty → defaults to random
-        - dataset empty → raises ValueError with clear message
         """
         # ── Resolve difficulty ────────────────────────────────────
@@ -54,7 +53,6 @@ class SQLDebuggerEnvironment:
             try:
                 diff_enum = DifficultyLevel(difficulty.lower())
             except ValueError:
-                # Invalid difficulty — pick random
                 diff_enum = random.choice(list(DifficultyLevel))
         else:
             diff_enum = random.choice(list(DifficultyLevel))
@@ -65,7 +63,18 @@ class SQLDebuggerEnvironment:
         except Exception as e:
             raise ValueError(f"Failed to load task: {str(e)}")
-        # ── Reset ALL state — no leakage ──────────────────────────
         self._current_task = task
         self._started_at   = time.time()
         self._state        = EpisodeState(
@@ -76,147 +85,194 @@ class SQLDebuggerEnvironment:
             done             = False,
             hints_used       = 0,
             previous_actions = [],
-            action_counts    = {},
             started_at       = self._started_at,
             last_reward      = 0.0,
             initialized      = True,
         )
-        # ── Build initial observation ─────────────────────────────
-        context = task_manager.build_observation_context(task)
-        return Observation(
-            task_id          = task["id"],
-            task_description = task["description"],
-            current_context  = context,
-            step_count       = 0,
-            difficulty       = diff_enum,
-            max_steps        = MAX_STEPS,
-            hints_used       = 0,
-            previous_actions = [],
-            metadata         = {
-                "category":        task.get("category", ""),
-                "estimated_steps": task.get("estimated_fix_steps", 5),
-                "started_at":      self._started_at,
-            }
-        )
     # ─────────────────────────────────────────────
-    #  step() → (Observation, Reward, done, info)
     # ─────────────────────────────────────────────
     def step(self, action: Optional[Action]) -> StepResponse:
         """
-        Accepts an Action, processes it, updates state,
-        computes dense reward, returns next Observation.
-        Edge cases handled:
-        - step() called before reset() → auto-resets
-        - null action → reward=-0.1, done=False, never crash
-        - malformed action payload → catches ValidationError
-        - agent loops (same action 3+ times) → loop penalty
-        - episode already done → returns terminal observation
-        - max steps reached → forces done=True
-        - extremely long payload → truncated in models.py
         """
         # ── Auto-reset if not initialized ────────────────────────
         if not self._state.initialized or self._current_task is None:
             obs = self.reset()
             return StepResponse(
-                observation=obs,
-                reward=Reward(score=0.5, breakdown={"auto_reset": True}, feedback="Environment auto-reset."),
-                done=False,
-                info={"auto_reset": True}
             )
         # ── Episode already done ──────────────────────────────────
         if self._state.done:
             obs = self._build_observation()
             return StepResponse(
-                observation=obs,
-                reward=Reward(score=0.5, breakdown={"episode_done": True}, feedback="Episode already finished. Call reset()."),
-                done=True,
-                info={"episode_done": True, "total_reward": self._state.total_reward}
             )
-        # ── Handle null / invalid action ─────────────────────────
         if action is None or action.payload is None:
             self._state.step_count += 1
-            obs = self._build_observation()
-            reward = Reward(
-                score=0.001,
-                breakdown={"invalid_action": 0.001},
-                feedback="Null or invalid action received."
-            )
-            self._state.last_reward   = -0.1
-            self._state.total_reward  = round(self._state.total_reward - 0.1, 4)
-            done = self._state.step_count >= MAX_STEPS
             self._state.done = done
             return StepResponse(observation=obs, reward=reward, done=done, info={"error": "null_action"})
-        # ── Validate action type ──────────────────────────────────
-        try:
-            action_type_val = action.action_type.value if hasattr(action.action_type, "value") else str(action.action_type)
-        except Exception:
-            action_type_val = "unknown"
         # ── Update step count ───────────────────────────���─────────
         self._state.step_count += 1
         self._state.previous_actions.append(action_type_val)
-        self._state.action_counts[action_type_val] = self._state.action_counts.get(action_type_val, 0) + 1
-        # ── Track hints ───────────────────────────────────────────
-        if action.action_type == ActionType.REQUEST_HINT:
             self._state.hints_used += 1
-            # Inject hint into next observation context
             hint_text = task_manager.get_hint(self._current_task, self._state.hints_used)
             self._current_task["_last_hint"] = hint_text
-        # ── Compute dense reward ──────────────────────────────────
         reward = compute_reward(
-            action           = action,
-            task_id          = self._state.task_id,
-            difficulty       = self._state.difficulty,
-            step_count       = self._state.step_count,
-            previous_actions = self._state.previous_actions[:-1],  # exclude current
-            hints_used       = self._state.hints_used,
-            estimated_steps  = self._current_task.get("estimated_fix_steps", 5),
-            action_counts    = self._state.action_counts,
         )
         # ── Update cumulative reward ──────────────────────────────
         self._state.last_reward  = reward.score
         self._state.total_reward = round(self._state.total_reward + reward.score, 4)
-        # ── Check done condition ──────────────────────────────────
         done = is_done(
-            action_type  = action.action_type,
-            step_count   = self._state.step_count,
-            grader_score = reward.breakdown.get("grader_score", 0.0),
         )
         self._state.done = done
-        # ── Build next observation ────────────────────────────────
         obs = self._build_observation()
-        # ── Build info dict ───────────────────────────────────────
         info = {
-            "step_count":    self._state.step_count,
-            "total_reward":  self._state.total_reward,
-            "hints_used":    self._state.hints_used,
-            "action_counts": self._state.action_counts,
-            "task_id":       self._state.task_id,
-            "difficulty":    self._state.difficulty.value if self._state.difficulty else None,
         }
         if done:
             info["episode_summary"] = {
-                "total_steps":  self._state.step_count,
-                "total_reward": self._state.total_reward,
-                "hints_used":   self._state.hints_used,
-                "duration_sec": round(time.time() - (self._started_at or time.time()), 2),
             }
-        # Normalize reward to strictly (0, 1) exclusive for validator compliance
         normalized_score = max(0.001, min(0.999, (reward.score + 1.0) / 2.0))
         reward = Reward(
             score=normalized_score,
@@ -231,13 +287,6 @@ class SQLDebuggerEnvironment:
     # ─────────────────────────────────────────────
     def state(self) -> EpisodeState:
-        """
-        Returns the full current state at any point.
-        Must be JSON-serializable. Must always reflect latest step.
-        Edge case: state() called before reset() → returns default empty state.
-        Never crashes.
-        """
         return self._state
     # ─────────────────────────────────────────────
@@ -245,13 +294,9 @@ class SQLDebuggerEnvironment:
     # ─────────────────────────────────────────────
     def _build_observation(self) -> Observation:
-        """
-        Builds the current Observation from internal state.
-        Injects hint into context if one was just requested.
-        CRITICAL: Never leaks fixed_query (ground truth) to agent.
-        """
         if self._current_task is None:
-            # Fallback safe observation
             return Observation(
                 task_id          = "none",
                 task_description = "No task loaded. Call reset() first.",
@@ -264,14 +309,33 @@ class SQLDebuggerEnvironment:
                 metadata         = {}
             )
         context = task_manager.build_observation_context(self._current_task)
-        # Inject hint if available
         if "_last_hint" in self._current_task:
             context["last_hint"] = self._current_task["_last_hint"]
-        # Add step progress info
-        context["steps_remaining"] = MAX_STEPS - self._state.step_count
         context["total_reward_so_far"] = self._state.total_reward
         return Observation(
@@ -284,10 +348,11 @@ class SQLDebuggerEnvironment:
             hints_used       = self._state.hints_used,
             previous_actions = self._state.previous_actions.copy(),
             metadata         = {
-                "category":        self._current_task.get("category", ""),
-                "estimated_steps": self._current_task.get("estimated_fix_steps", 5),
-                "total_reward":    self._state.total_reward,
-                "action_counts":   self._state.action_counts,
             }
         )
@@ -296,4 +361,4 @@ class SQLDebuggerEnvironment:
 #  SINGLETON INSTANCE (used by FastAPI)
 # ─────────────────────────────────────────────
-environment = SQLDebuggerEnvironment()

+"""
+env/environment.py — SQL Database Engineer Agent (SDEA)
+Round 2: Long-horizon DB optimization environment.
+Agent manages a simulated production database over 50 steps.
+"""
 import time
 import random
 from typing import Optional
 )
 from env.tasks import task_manager
 from env.reward import compute_reward, is_done, MAX_STEPS
+from env.db_simulator import DatabaseSimulator
 class SQLDebuggerEnvironment:
     """
+    OpenEnv-compliant SQL Database Engineer Agent Environment.
+    Round 2 evolution:
+    - 50-step long-horizon episodes (up from 20)
+    - 10 action types including DB-specific actions
+    - DatabaseSimulator tracks real performance score 0-100
+    - Milestone bonuses at 25%/50%/75% improvement
+    - Backward compatible with Round 1 actions
     """
     def __init__(self):
+        self._state             = EpisodeState()
+        self._current_task      = None
+        self._started_at        = None
+        self._db_sim: Optional[DatabaseSimulator] = None
+        self._milestones_earned: set  = set()
+        self._baseline_score:   float = 0.0
     # ─────────────────────────────────────────────
     #  reset() → Observation
     def reset(self, difficulty: Optional[str] = None, task_id: Optional[str] = None) -> Observation:
         """
+        Starts a fresh episode. Clears ALL state.
+        Loads scenario and initializes DatabaseSimulator.
         """
         # ── Resolve difficulty ────────────────────────────────────
             try:
                 diff_enum = DifficultyLevel(difficulty.lower())
             except ValueError:
                 diff_enum = random.choice(list(DifficultyLevel))
         else:
             diff_enum = random.choice(list(DifficultyLevel))
         except Exception as e:
             raise ValueError(f"Failed to load task: {str(e)}")
+        # ── Initialize DatabaseSimulator ──────────────────────────
+        # Only initialize for Round 2 scenarios (have 'tables' key)
+        if "tables" in task and "slow_queries" in task:
+            self._db_sim         = DatabaseSimulator(task)
+            self._baseline_score = self._db_sim.get_performance_score()
+        else:
+                # Round 1 task — no DB simulator needed
+            self._db_sim         = None
+            self._baseline_score = 0.0
+            self._milestones_earned = set()
+        # ── Reset episode state ───────────────────────────────────
         self._current_task = task
         self._started_at   = time.time()
         self._state        = EpisodeState(
             done             = False,
             hints_used       = 0,
             previous_actions = [],
+            action_counts    = {
+                "_baseline_score": self._baseline_score,
+                "_target_score":   task.get("target_score", 85.0),
+                "_milestones":     [],
+                "_perf_history":   [self._baseline_score],
+                "_best_score":     self._baseline_score,
+            },
             started_at       = self._started_at,
             last_reward      = 0.0,
             initialized      = True,
         )
+        return self._build_observation()
     # ─────────────────────────────────────────────
+    #  step() → StepResponse
     # ─────────────────────────────────────────────
     def step(self, action: Optional[Action]) -> StepResponse:
         """
+        Processes an action, updates DB simulator, computes reward.
+        Handles all Round 2 DB engineering actions.
         """
         # ── Auto-reset if not initialized ────────────────────────
         if not self._state.initialized or self._current_task is None:
             obs = self.reset()
             return StepResponse(
+                observation = obs,
+                reward      = Reward(score=0.5, breakdown={"auto_reset": True}, feedback="Environment auto-reset."),
+                done        = False,
+                info        = {"auto_reset": True}
             )
         # ── Episode already done ──────────────────────────────────
         if self._state.done:
             obs = self._build_observation()
             return StepResponse(
+                observation = obs,
+                reward      = Reward(score=0.5, breakdown={"episode_done": True}, feedback="Episode finished. Call reset()."),
+                done        = True,
+                info        = {"episode_done": True, "total_reward": self._state.total_reward}
             )
+        # ── Handle null action ────────────────────────────────────
         if action is None or action.payload is None:
             self._state.step_count += 1
+            obs    = self._build_observation()
+            reward = Reward(score=0.001, breakdown={"invalid_action": 0.001}, feedback="Null action.")
+            done   = self._state.step_count >= MAX_STEPS
             self._state.done = done
             return StepResponse(observation=obs, reward=reward, done=done, info={"error": "null_action"})
+        action_type_val  = action.action_type.value if hasattr(action.action_type, "value") else str(action.action_type)
+        action_type_enum = action.action_type
         # ── Update step count ───────────────────────────���─────────
         self._state.step_count += 1
         self._state.previous_actions.append(action_type_val)
+        self._state.action_counts[action_type_val] = \
+            self._state.action_counts.get(action_type_val, 0) + 1
+        # ── Handle hints ──────────────────────────────────────────
+        if action_type_enum == ActionType.REQUEST_HINT:
             self._state.hints_used += 1
             hint_text = task_manager.get_hint(self._current_task, self._state.hints_used)
             self._current_task["_last_hint"] = hint_text
+        # ── Apply DB action and get delta ─────────────────────────
+        db_delta      = 0.0
+        current_score = self._baseline_score
+        action_info   = {}
+        if self._db_sim is not None:
+            payload = action.payload or {}
+            if action_type_enum == ActionType.INSPECT_QUERY:
+                qid         = payload.get("query_id", "q1")
+                action_info = self._db_sim.inspect_query(qid)
+                self._current_task["_last_inspect"] = action_info
+                # No score change — investigation action
+            elif action_type_enum == ActionType.ANALYZE_INDEXES:
+                table       = payload.get("table", "")
+                action_info = self._db_sim.analyze_indexes(table)
+                self._current_task["_last_analysis"] = action_info
+            elif action_type_enum == ActionType.CREATE_INDEX:
+                result      = self._db_sim.apply_action("create_index", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            elif action_type_enum == ActionType.REWRITE_QUERY:
+                result      = self._db_sim.apply_action("rewrite_query", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            elif action_type_enum == ActionType.ADD_COLUMN:
+                result      = self._db_sim.apply_action("add_column", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            elif action_type_enum == ActionType.DROP_INDEX:
+                result      = self._db_sim.apply_action("drop_index", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            elif action_type_enum == ActionType.PARTITION_TABLE:
+                result      = self._db_sim.apply_action("partition_table", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            elif action_type_enum == ActionType.ANALYZE_STATS:
+                result      = self._db_sim.apply_action("analyze_statistics", payload)
+                db_delta    = result["delta"]
+                action_info = result
+            current_score = self._db_sim.get_performance_score()
+            # Update tracking in action_counts dict (used by /progress)
+            perf_history = self._state.action_counts.get("_perf_history", [])
+            perf_history.append(current_score)
+            self._state.action_counts["_perf_history"] = perf_history
+            self._state.action_counts["_best_score"]   = self._db_sim.best_score
+        # ── Compute reward ────────────────────────────────────────
         reward = compute_reward(
+            action            = action,
+            task_id           = self._state.task_id,
+            difficulty        = self._state.difficulty,
+            step_count        = self._state.step_count,
+            previous_actions  = self._state.previous_actions[:-1],
+            hints_used        = self._state.hints_used,
+            estimated_steps   = self._current_task.get("estimated_fix_steps", MAX_STEPS),
+            action_counts     = self._state.action_counts,
+            db_delta          = db_delta,
+            baseline_score    = self._baseline_score,
+            current_score     = current_score,
+            milestones_earned = self._milestones_earned,
         )
+        # Update milestone tracking
+        self._state.action_counts["_milestones"] = list(self._milestones_earned)
         # ── Update cumulative reward ──────────────────────────────
         self._state.last_reward  = reward.score
         self._state.total_reward = round(self._state.total_reward + reward.score, 4)
+        # ── Check done ────────────────────────────────────────────
+        target_reached = (
+            self._db_sim.is_target_reached() if self._db_sim else False
+        )
         done = is_done(
+            action_type     = action_type_enum,
+            step_count      = self._state.step_count,
+            grader_score    = reward.breakdown.get("grader_score", 0.0),
+            target_reached  = target_reached,
         )
         self._state.done = done
+        # ── Build observation ─────────────────────────────────────
         obs = self._build_observation()
+        # ── Info dict ─────────────────────────────────────────────
         info = {
+            "step_count":       self._state.step_count,
+            "total_reward":     self._state.total_reward,
+            "hints_used":       self._state.hints_used,
+            "task_id":          self._state.task_id,
+            "difficulty":       self._state.difficulty.value if self._state.difficulty else None,
+            "performance_score": current_score,
+            "db_delta":         db_delta,
+            "milestones":       list(self._milestones_earned),
+            "action_result":    action_info,
         }
         if done:
             info["episode_summary"] = {
+                "total_steps":       self._state.step_count,
+                "total_reward":      self._state.total_reward,
+                "hints_used":        self._state.hints_used,
+                "duration_sec":      round(time.time() - (self._started_at or time.time()), 2),
+                "final_score":       current_score,
+                "baseline_score":    self._baseline_score,
+                "improvement":       round(current_score - self._baseline_score, 2),
+                "milestones_earned": list(self._milestones_earned),
             }
+        # Normalize reward for validator compliance
         normalized_score = max(0.001, min(0.999, (reward.score + 1.0) / 2.0))
         reward = Reward(
             score=normalized_score,
     # ─────────────────────────────────────────────
     def state(self) -> EpisodeState:
         return self._state
     # ─────────────────────────────────────────────
     # ─────────────────────────────────────────────
     def _build_observation(self) -> Observation:
+        """Builds Observation from current state + DB simulator state."""
         if self._current_task is None:
             return Observation(
                 task_id          = "none",
                 task_description = "No task loaded. Call reset() first.",
                 metadata         = {}
             )
+        # Base context from task
         context = task_manager.build_observation_context(self._current_task)
+        # Inject DB simulator state
+        if self._db_sim is not None:
+            db_state = self._db_sim.get_current_state()
+            context.update({
+                "performance_score":   db_state["performance_score"],
+                "target_score":        db_state["target_score"],
+                "baseline_score":      db_state["baseline_score"],
+                "tables":              db_state["tables"],
+                "slow_queries":        db_state["slow_queries"],
+                "indexes":             db_state["indexes"],
+                "improvement_history": db_state["history"],
+                "best_score":          db_state["best_score"],
+                "milestones_earned":   list(self._milestones_earned),
+            })
+        # Inject last action result if available
+        if "_last_inspect" in self._current_task:
+            context["last_inspect_result"] = self._current_task["_last_inspect"]
+        if "_last_analysis" in self._current_task:
+            context["last_analysis_result"] = self._current_task["_last_analysis"]
         if "_last_hint" in self._current_task:
             context["last_hint"] = self._current_task["_last_hint"]
+        context["steps_remaining"]    = MAX_STEPS - self._state.step_count
         context["total_reward_so_far"] = self._state.total_reward
         return Observation(
             hints_used       = self._state.hints_used,
             previous_actions = self._state.previous_actions.copy(),
             metadata         = {
+                "category":         self._current_task.get("category", ""),
+                "baseline_score":   self._baseline_score,
+                "target_score":     self._current_task.get("target_score", 85.0),
+                "total_reward":     self._state.total_reward,
+                "milestones":       list(self._milestones_earned),
             }
         )
 #  SINGLETON INSTANCE (used by FastAPI)
 # ─────────────────────────────────────────────
+environment = SQLDebuggerEnvironment()

env/models.py CHANGED Viewed

@@ -4,7 +4,9 @@ from enum import Enum
 import time
 #  ENUMS
 class DifficultyLevel(str, Enum):
     EASY   = "easy"
@@ -13,42 +15,57 @@ class DifficultyLevel(str, Enum):
 class ActionType(str, Enum):
-    IDENTIFY_ERROR    = "identify_error"
-    PROPOSE_FIX       = "propose_fix"
-    SUBMIT_ANSWER     = "submit_answer"
-    REQUEST_HINT      = "request_hint"
-    EXPLAIN_ISSUE     = "explain_issue"
-    OPTIMIZE_QUERY    = "optimize_query"
-# CORE MODELS
 class Observation(BaseModel):
-    task_id:          str            = Field(..., description="Unique task identifier")
-    task_description: str            = Field(..., description="What the agent must do")
-    current_context:  dict           = Field(..., description="What the agent currently sees")
-    step_count:       int            = Field(default=0, ge=0, description="Steps taken so far")
     difficulty:       DifficultyLevel = Field(..., description="Task difficulty level")
-    max_steps:        int            = Field(default=20, description="Maximum steps allowed")
-    hints_used:       int            = Field(default=0, description="Number of hints used")
-    previous_actions: list[str]      = Field(default_factory=list, description="History of action types taken")
-    metadata:         dict           = Field(default_factory=dict, description="Extra task metadata")
     model_config = {"json_schema_extra": {
         "example": {
-            "task_id": "easy_001",
-            "task_description": "Fix the SQL syntax error in the query below.",
             "current_context": {
-                "buggy_query": "SELECT id, name FROM users WHERE id = 1 AND",
-                "error_message": "SyntaxError: unexpected end of input",
-                "database_schema": "users(id INT, name VARCHAR, email VARCHAR)"
             },
             "step_count": 0,
             "difficulty": "easy",
-            "max_steps": 20,
             "hints_used": 0,
             "previous_actions": [],
-            "metadata": {"category": "syntax", "estimated_fix_steps": 2}
         }
     }}
@@ -67,7 +84,6 @@ class Action(BaseModel):
     @field_validator("payload")
     @classmethod
     def truncate_long_strings(cls, v):
-        # Edge case: extremely long agent output — truncate gracefully
         def truncate(obj, max_len=5000):
             if isinstance(obj, str) and len(obj) > max_len:
                 return obj[:max_len] + "...[truncated]"
@@ -78,12 +94,10 @@ class Action(BaseModel):
     model_config = {"json_schema_extra": {
         "example": {
-            "action_type": "submit_answer",
             "payload": {
-                "fixed_query":   "SELECT id, name FROM users WHERE id = 1",
-                "explanation":   "Removed the trailing AND which caused a syntax error",
-                "error_type":    "syntax",
-                "confidence":    0.95
             }
         }
     }}
@@ -103,49 +117,53 @@ class Reward(BaseModel):
         "example": {
             "score": 0.75,
             "breakdown": {
-                "correct_answer":  0.5,
-                "explanation":     0.2,
-                "confidence":      0.05,
-                "step_efficiency": 0.0
             },
-            "feedback": "Correct fix applied. Good explanation provided. Minor efficiency penalty."
         }
     }}
 #  EPISODE STATE (used by state() endpoint)
 class EpisodeState(BaseModel):
-    task_id:          Optional[str]            = Field(default=None)
     difficulty:       Optional[DifficultyLevel] = Field(default=None)
     step_count:       int                       = Field(default=0)
     total_reward:     float                     = Field(default=0.0)
     done:             bool                      = Field(default=False)
     hints_used:       int                       = Field(default=0)
     previous_actions: list[str]                 = Field(default_factory=list)
-    action_counts:    dict[str, int]            = Field(default_factory=dict)
     started_at:       Optional[float]           = Field(default=None)
     last_reward:      float                     = Field(default=0.0)
     initialized:      bool                      = Field(default=False)
     model_config = {"json_schema_extra": {
         "example": {
-            "task_id":          "medium_002",
-            "difficulty":       "medium",
             "step_count":       3,
-            "total_reward":     0.45,
             "done":             False,
-            "hints_used":       1,
-            "previous_actions": ["identify_error", "request_hint", "propose_fix"],
-            "action_counts":    {"identify_error": 1, "request_hint": 1, "propose_fix": 1},
             "started_at":       1700000000.0,
-            "last_reward":      0.25,
             "initialized":      True
         }
     }}
 #  API REQUEST / RESPONSE WRAPPERS
 class StepResponse(BaseModel):
     observation: Observation
@@ -157,15 +175,15 @@ class ResetResponse(BaseModel):
     observation: Observation
 class TaskInfo(BaseModel):
-    id:           str
-    difficulty:   DifficultyLevel
-    description:  str
-    action_schema: dict   # REQUIRED by validator — field definitions not just names
 class TaskListResponse(BaseModel):
-    tasks:         list[TaskInfo]
-    total:         int
-    action_types:  list[str]
 class BaselineResult(BaseModel):
     task_id:    str
@@ -180,7 +198,7 @@ class BaselineResult(BaseModel):
         return max(0.001, min(0.999, round(float(v), 4)))
 class BaselineResponse(BaseModel):
-    results:      list[BaselineResult]
     average_score: float
     completed_at:  float = Field(default_factory=time.time)
@@ -201,13 +219,30 @@ class GraderResponse(BaseModel):
     model_config = {"json_schema_extra": {
         "example": {
-            "score": 0.75,
-            "feedback": "Correct fix applied.",
-            "breakdown": {"fix_correctness": 0.5, "explanation": 0.15, "confidence": 0.05}
         }
     }}
 class HealthResponse(BaseModel):
-    status:  str = "ok"
-    version: str = "1.0.0"
-    uptime:  float = Field(default_factory=time.time)

 import time
+# ─────────────────────────────────────────────
 #  ENUMS
+# ─────────────────────────────────────────────
 class DifficultyLevel(str, Enum):
     EASY   = "easy"
 class ActionType(str, Enum):
+    # ── Round 1 actions (keep — backward compatible) ──
+    IDENTIFY_ERROR  = "identify_error"
+    PROPOSE_FIX     = "propose_fix"
+    SUBMIT_ANSWER   = "submit_answer"
+    REQUEST_HINT    = "request_hint"
+    EXPLAIN_ISSUE   = "explain_issue"
+    OPTIMIZE_QUERY  = "optimize_query"
+    # ── Round 2 new actions ──
+    INSPECT_QUERY    = "inspect_query"
+    ANALYZE_INDEXES  = "analyze_indexes"
+    CREATE_INDEX     = "create_index"
+    REWRITE_QUERY    = "rewrite_query"
+    ADD_COLUMN       = "add_column"
+    DROP_INDEX       = "drop_index"
+    PARTITION_TABLE  = "partition_table"
+    ANALYZE_STATS    = "analyze_statistics"
+    SUBMIT_REPORT    = "submit_report"
+# ─────────────────────────────────────────────
+#  CORE MODELS
+# ─────────────────────────────────────────────
 class Observation(BaseModel):
+    task_id:          str             = Field(..., description="Unique task identifier")
+    task_description: str             = Field(..., description="What the agent must do")
+    current_context:  dict            = Field(..., description="What the agent currently sees")
+    step_count:       int             = Field(default=0, ge=0, description="Steps taken so far")
     difficulty:       DifficultyLevel = Field(..., description="Task difficulty level")
+    max_steps:        int             = Field(default=50, description="Maximum steps allowed")
+    hints_used:       int             = Field(default=0, description="Number of hints used")
+    previous_actions: list[str]       = Field(default_factory=list, description="History of action types taken")
+    metadata:         dict            = Field(default_factory=dict, description="Extra task metadata")
     model_config = {"json_schema_extra": {
         "example": {
+            "task_id": "easy_s001",
+            "task_description": "Optimize a slow user lookup query on 10K users table.",
             "current_context": {
+                "tables": [{"name": "users", "rows": 10000, "indexes": ["PRIMARY"]}],
+                "slow_queries": [{"id": "q1", "sql": "SELECT * FROM users WHERE email=?", "avg_ms": 2000}],
+                "performance_score": 8.0,
+                "target_score": 80.0
             },
             "step_count": 0,
             "difficulty": "easy",
+            "max_steps": 50,
             "hints_used": 0,
             "previous_actions": [],
+            "metadata": {"scenario_id": "easy_s001", "baseline_score": 8.0}
         }
     }}
     @field_validator("payload")
     @classmethod
     def truncate_long_strings(cls, v):
         def truncate(obj, max_len=5000):
             if isinstance(obj, str) and len(obj) > max_len:
                 return obj[:max_len] + "...[truncated]"
     model_config = {"json_schema_extra": {
         "example": {
+            "action_type": "create_index",
             "payload": {
+                "table":   "users",
+                "columns": ["email"]
             }
         }
     }}
         "example": {
             "score": 0.75,
             "breakdown": {
+                "step_reward":    0.05,
+                "delta_reward":   0.40,
+                "milestone_bonus": 0.15,
+                "total":          0.60
             },
+            "feedback": "Index created. Performance improved 55%. Milestone bonus earned!"
         }
     }}
+# ─────────────────────────────────────────────
 #  EPISODE STATE (used by state() endpoint)
+# ─────────────────────────────────────────────
 class EpisodeState(BaseModel):
+    task_id:          Optional[str]             = Field(default=None)
     difficulty:       Optional[DifficultyLevel] = Field(default=None)
     step_count:       int                       = Field(default=0)
     total_reward:     float                     = Field(default=0.0)
     done:             bool                      = Field(default=False)
     hints_used:       int                       = Field(default=0)
     previous_actions: list[str]                 = Field(default_factory=list)
+    action_counts:    dict[str, Any]            = Field(default_factory=dict)
     started_at:       Optional[float]           = Field(default=None)
     last_reward:      float                     = Field(default=0.0)
     initialized:      bool                      = Field(default=False)
     model_config = {"json_schema_extra": {
         "example": {
+            "task_id":          "easy_s001",
+            "difficulty":       "easy",
             "step_count":       3,
+            "total_reward":     0.65,
             "done":             False,
+            "hints_used":       0,
+            "previous_actions": ["inspect_query", "analyze_indexes", "create_index"],
+            "action_counts":    {"inspect_query": 1, "analyze_indexes": 1, "create_index": 1},
             "started_at":       1700000000.0,
+            "last_reward":      0.45,
             "initialized":      True
         }
     }}
+# ─────────────────────────────────────────────
 #  API REQUEST / RESPONSE WRAPPERS
+# ─────────────────────────────────────────────
 class StepResponse(BaseModel):
     observation: Observation
     observation: Observation
 class TaskInfo(BaseModel):
+    id:            str
+    difficulty:    DifficultyLevel
+    description:   str
+    action_schema: dict
 class TaskListResponse(BaseModel):
+    tasks:        list[TaskInfo]
+    total:        int
+    action_types: list[str]
 class BaselineResult(BaseModel):
     task_id:    str
         return max(0.001, min(0.999, round(float(v), 4)))
 class BaselineResponse(BaseModel):
+    results:       list[BaselineResult]
     average_score: float
     completed_at:  float = Field(default_factory=time.time)
     model_config = {"json_schema_extra": {
         "example": {
+            "score":    0.82,
+            "feedback": "Performance improved from 12.5 to 85.0. Excellent optimization!",
+            "breakdown": {"perf_improvement": 0.60, "step_efficiency": 0.12, "index_quality": 0.10}
         }
     }}
 class HealthResponse(BaseModel):
+    status:  str   = "ok"
+    version: str   = "2.0.0"
+    uptime:  float = Field(default_factory=time.time)
+# ─────────────────────────────────────────────
+#  ROUND 2 — PROGRESS RESPONSE
+# ─────────────────────────────────────────────
+class ProgressResponse(BaseModel):
+    scenario_id:         Optional[str]  = Field(default=None)
+    performance_score:   float          = Field(default=0.0, description="Current DB performance score 0-100")
+    baseline_score:      float          = Field(default=0.0, description="Starting score this episode")
+    target_score:        float          = Field(default=85.0, description="Score needed to succeed")
+    improvement_history: list[float]    = Field(default_factory=list)
+    milestones_earned:   list[float]    = Field(default_factory=list)
+    best_score:          float          = Field(default=0.0)
+    steps_used:          int            = Field(default=0)
+    budget_remaining:    int            = Field(default=50)
+    total_reward:        float          = Field(default=0.0)

env/reward.py CHANGED Viewed

@@ -1,41 +1,94 @@
 from env.models import Action, Reward, DifficultyLevel, ActionType
 from env.graders import grade
 #  CONSTANTS
-MAX_STEPS        = 20
-HINT_PENALTY     = -0.05   # Per hint requested
-LOOP_PENALTY     = -0.05   # Same action 3+ times in a row
-INVALID_PENALTY  = -0.10   # Null / malformed action
-STEP_EFFICIENCY_BONUS = 0.10  # Bonus for solving in fewer steps than estimated
-# Dense reward per action type (before grader score)
 STEP_REWARDS = {
-    ActionType.IDENTIFY_ERROR:  0.15,  # Rewarded for diagnosing
-    ActionType.PROPOSE_FIX:     0.25,  # Rewarded for attempting fix
-    ActionType.SUBMIT_ANSWER:   0.00,  # Final score comes from grader
-    ActionType.REQUEST_HINT:    0.00,  # No reward, only penalty
-    ActionType.EXPLAIN_ISSUE:   0.10,  # Rewarded for explaining
-    ActionType.OPTIMIZE_QUERY:  0.20,  # Rewarded for optimization attempt
 }
-#  LOOP DETECTOR
-def _detect_loop(previous_actions: list[str], current_action: str) -> bool:
     """
-    Returns True if the agent has submitted the same action type
-    3 or more times in a row — indicating a stuck loop.
     """
-    if len(previous_actions) < 2:
         return False
-    last_two = previous_actions[-2:]
-    return all(a == current_action for a in last_two)
 def _count_consecutive(previous_actions: list[str], current_action: str) -> int:
-    """Count how many times the current action has been repeated consecutively."""
     count = 1
     for a in reversed(previous_actions):
         if a == current_action:
@@ -45,24 +98,22 @@ def _count_consecutive(previous_actions: list[str], current_action: str) -> int:
     return count
 #  EFFICIENCY BONUS
-def _efficiency_bonus(step_count: int, estimated_steps: int) -> float:
-    """
-    Bonus reward if agent solves faster than estimated.
-    Encourages efficient reasoning, not just correct answers.
-    """
-    if step_count <= 0 or estimated_steps <= 0:
-        return 0.0
-    if step_count <= estimated_steps:
-        ratio = step_count / estimated_steps
-        # More bonus the faster — scales from 0.10 down to 0.0
-        return round(STEP_EFFICIENCY_BONUS * (1.0 - ratio + 0.1), 4)
     return 0.0
 #  MAIN REWARD FUNCTION
 def compute_reward(
     action:           Action,
@@ -73,25 +124,33 @@ def compute_reward(
     hints_used:       int,
     estimated_steps:  int,
     action_counts:    dict[str, int],
 ) -> Reward:
     """
-    Computes a DENSE reward signal for every step.
-    Never returns 0.0 for all steps — reward varies at each step.
-    Dense reward components:
-    1. Step reward     — small reward just for taking valid action
-    2. Grader score    — full grader score on submit_answer / optimize_query
-    3. Loop penalty    — repeated same action 3+ times
-    4. Hint penalty    — accumulated hint cost
-    5. Efficiency bonus — solved faster than estimated steps
-    6. Invalid penalty — null / malformed action
-    Score is always clamped to [-1.0, 1.0].
     """
-    breakdown     = {}
     feedback_parts = []
-    final_score   = 0.0
     # ── Edge case: null action ────────────────────────────────────
     if action is None or action.payload is None:
@@ -100,105 +159,155 @@ def compute_reward(
             breakdown={"invalid_action": 0.001},
             feedback="Invalid or null action received."
         )
-    action_type_val = action.action_type.value if hasattr(action.action_type, "value") else str(action.action_type)
     action_type_enum = action.action_type
-    # ── 1. Step reward (dense signal) ────────────────────────────
     step_reward = STEP_REWARDS.get(action_type_enum, 0.05)
     breakdown["step_reward"] = round(step_reward, 4)
     final_score += step_reward
     if step_reward > 0:
-        feedback_parts.append(f"Action '{action_type_val}' rewarded +{step_reward}.")
-    # ── 2. Grader score for terminal actions ──────────────────────
     grader_score = 0.0
-    is_terminal  = action_type_enum in (ActionType.SUBMIT_ANSWER, ActionType.OPTIMIZE_QUERY)
-    if is_terminal:
         raw_score, grader_breakdown, grader_feedback = grade(action, task_id)
         grader_score = raw_score
-        breakdown["grader_score"]    = round(grader_score, 4)
         breakdown["grader_breakdown"] = grader_breakdown
         final_score += grader_score
         feedback_parts.append(grader_feedback)
-        # Efficiency bonus — only on correct terminal action
         if grader_score >= 0.5:
-            eff_bonus = _efficiency_bonus(step_count, estimated_steps)
             if eff_bonus > 0:
                 final_score += eff_bonus
                 breakdown["efficiency_bonus"] = round(eff_bonus, 4)
-                feedback_parts.append(f"Efficiency bonus +{eff_bonus} for solving in {step_count} steps.")
     elif action_type_enum == ActionType.PROPOSE_FIX:
-        # Partial grader score for propose_fix — encourages iterative improvement
         raw_score, grader_breakdown, _ = grade(action, task_id)
-        partial = round(raw_score * 0.4, 4)  # 40% of full grader score
-        grader_score = partial
         breakdown["partial_grader_score"] = partial
         final_score += partial
-        if partial > 0:
-            feedback_parts.append(f"Partial fix credit +{partial}.")
     elif action_type_enum == ActionType.IDENTIFY_ERROR:
-        # Small grader check on error identification
         raw_score, _, _ = grade(action, task_id)
-        partial = round(raw_score * 0.2, 4)  # 20% for identification step
         breakdown["identification_score"] = partial
         final_score += partial
-    # ── 3. Loop penalty ───────────────────────────────────────────
     if _detect_loop(previous_actions, action_type_val):
         consecutive = _count_consecutive(previous_actions, action_type_val)
-        loop_pen    = LOOP_PENALTY * min(consecutive - 2, 3)  # Cap at 3x penalty
         final_score += loop_pen
         breakdown["loop_penalty"] = round(loop_pen, 4)
-        feedback_parts.append(f"Loop detected ({consecutive}x same action). Penalty {loop_pen}.")
-    # ── 4. Hint penalty ───────────────────────────────────────────
     if action_type_enum == ActionType.REQUEST_HINT:
-        hint_pen     = HINT_PENALTY
-        final_score += hint_pen
-        breakdown["hint_penalty"] = round(hint_pen, 4)
-        feedback_parts.append(f"Hint requested. Penalty {hint_pen}.")
-    # ── 5. Max steps penalty ──────────────────────────────────────
-    if step_count >= MAX_STEPS - 1:
-        final_score += -0.10
-        breakdown["max_steps_penalty"] = -0.10
-        feedback_parts.append("Approaching max steps limit. Penalty applied.")
-    # ── Clamp to [-1.0, 1.0] ─────────────────────────────────────
-    # Clamp strictly between 0.001 and 0.999 for validator compliance
     final_score = round(max(0.001, min(0.999, final_score)), 4)
     breakdown["total"] = final_score
     feedback = " ".join(feedback_parts) if feedback_parts else "Step processed."
-    return Reward(
-        score=final_score,
-        breakdown=breakdown,
-        feedback=feedback
-    )
 #  EPISODE DONE CONDITION
 def is_done(
-    action_type:      ActionType,
-    step_count:       int,
-    grader_score:     float = 0.0,
 ) -> bool:
     """
     Episode ends when:
-    1. Agent submits final answer (submit_answer / optimize_query)
     2. Max steps reached
-    3. Perfect score achieved
     """
-    if action_type in (ActionType.SUBMIT_ANSWER, ActionType.OPTIMIZE_QUERY):
         return True
     if step_count >= MAX_STEPS:
         return True
     if grader_score >= 1.0:
         return True
-    return False

 from env.models import Action, Reward, DifficultyLevel, ActionType
 from env.graders import grade
+# ─────────────────────────────────────────────
 #  CONSTANTS
+# ─────────────────────────────────────────────
+MAX_STEPS             = 50    # Round 2: long-horizon episodes
+HINT_PENALTY          = -0.10  # Per hint requested (increased from Round 1)
+LOOP_PENALTY          = -0.08  # Same action on same target 2+ times, no improvement
+INVALID_PENALTY       = -0.10  # Null / malformed action
+BACKTRACK_PENALTY     = -0.05  # Action makes score worse than previous best
+BUDGET_EXHAUSTION_PEN = -0.15  # Reaching max_steps without submitting report
+EFFICIENCY_BONUS      =  0.10  # Solved in < 70% of max_steps
+# Milestone thresholds: {improvement_fraction: bonus_reward}
+MILESTONE_THRESHOLDS = {
+    0.25: 0.15,   # 25% improvement → +0.15 bonus
+    0.50: 0.25,   # 50% improvement → +0.25 bonus
+    0.75: 0.40,   # 75% improvement → +0.40 bonus
+}
+# Step rewards for Round 2 actions (dense signal)
 STEP_REWARDS = {
+    # ── Round 2 actions ──────────────────────────
+    ActionType.INSPECT_QUERY:    0.05,   # Investigation rewarded
+    ActionType.ANALYZE_INDEXES:  0.05,   # Investigation rewarded
+    ActionType.CREATE_INDEX:     0.10,   # Core optimization action
+    ActionType.REWRITE_QUERY:    0.15,   # High-value rewrite
+    ActionType.ADD_COLUMN:       0.08,   # Denormalization
+    ActionType.DROP_INDEX:       0.05,   # Clean up overhead
+    ActionType.PARTITION_TABLE:  0.15,   # Big structural improvement
+    ActionType.ANALYZE_STATS:    0.05,   # Maintenance action
+    ActionType.SUBMIT_REPORT:    0.00,   # Terminal — score comes from grader
+    ActionType.REQUEST_HINT:     0.00,   # No reward, only penalty
+    # ── Round 1 backward compat ──────────────────
+    ActionType.IDENTIFY_ERROR:   0.15,
+    ActionType.PROPOSE_FIX:      0.25,
+    ActionType.SUBMIT_ANSWER:    0.00,
+    ActionType.EXPLAIN_ISSUE:    0.10,
+    ActionType.OPTIMIZE_QUERY:   0.20,
 }
+# Terminal actions that end the episode
+TERMINAL_ACTIONS = {
+    ActionType.SUBMIT_ANSWER,
+    ActionType.OPTIMIZE_QUERY,
+    ActionType.SUBMIT_REPORT,
+}
+# ─────────────────────────────────────────────
+#  MILESTONE TRACKER
+# ─────────────────────────────────────────────
+def check_milestones(
+    baseline_score: float,
+    new_score:      float,
+    earned:         set,
+) -> tuple[float, list[float]]:
     """
+    Returns (total_bonus, newly_earned_thresholds).
+    One-time bonuses — each milestone only paid once per episode.
     """
+    max_possible   = max(1.0, 100.0 - baseline_score)
+    improvement    = (new_score - baseline_score) / max_possible
+    bonus          = 0.0
+    newly_earned   = []
+    for threshold, reward in MILESTONE_THRESHOLDS.items():
+        if improvement >= threshold and threshold not in earned:
+            bonus        += reward
+            newly_earned.append(threshold)
+            earned.add(threshold)
+    return round(bonus, 4), newly_earned
+# ─────────────────────────────────────────────
+#  LOOP DETECTOR
+# ─────────────────────────────────────────────
+def _detect_loop(previous_actions: list[str], current_action: str) -> bool:
+    """Returns True if agent has done the same action 2+ times in a row."""
+    if len(previous_actions) < 1:
         return False
+    last = previous_actions[-1]
+    return last == current_action
 def _count_consecutive(previous_actions: list[str], current_action: str) -> int:
     count = 1
     for a in reversed(previous_actions):
         if a == current_action:
     return count
+# ─────────────────────────────────────────────
 #  EFFICIENCY BONUS
+# ─────────────────────────────────────────────
+def _efficiency_bonus(step_count: int, max_steps: int) -> float:
+    """Bonus if agent finishes in < 70% of budget."""
+    threshold = max_steps * 0.70
+    if step_count <= threshold:
+        ratio = step_count / max(1, max_steps)
+        return round(EFFICIENCY_BONUS * (1.0 - ratio), 4)
     return 0.0
+# ─────────────────────────────────────────────
 #  MAIN REWARD FUNCTION
+# ─────────────────────────────────────────────
 def compute_reward(
     action:           Action,
     hints_used:       int,
     estimated_steps:  int,
     action_counts:    dict[str, int],
+    # Round 2 extras (optional — backward compatible)
+    db_delta:         float = 0.0,     # Performance score delta from DatabaseSimulator
+    baseline_score:   float = 0.0,     # Scenario baseline score
+    current_score:    float = 0.0,     # Current DB performance score
+    milestones_earned: set  = None,    # Set of already-earned milestone thresholds
 ) -> Reward:
     """
+    Computes dense reward signal for every step.
+    Components:
+    1. Step reward     — small reward for valid action type
+    2. Delta reward    — proportional to DB performance improvement (Round 2)
+    3. Milestone bonus — one-time bonus at 25%/50%/75% improvement
+    4. Grader score    — full score on terminal actions (Round 1 compat)
+    5. Loop penalty    — repeated same action with no improvement
+    6. Hint penalty    — cost per hint
+    7. Backtrack penalty — action made things worse
+    8. Budget penalty  — approaching max_steps without submitting
+    9. Efficiency bonus — solved fast
     """
+    if milestones_earned is None:
+        milestones_earned = set()
+    breakdown      = {}
     feedback_parts = []
+    final_score    = 0.0
     # ── Edge case: null action ────────────────────────────────────
     if action is None or action.payload is None:
             breakdown={"invalid_action": 0.001},
             feedback="Invalid or null action received."
         )
+    action_type_val  = action.action_type.value if hasattr(action.action_type, "value") else str(action.action_type)
     action_type_enum = action.action_type
+    # ── 1. Step reward ────────────────────────────────────────────
     step_reward = STEP_REWARDS.get(action_type_enum, 0.05)
     breakdown["step_reward"] = round(step_reward, 4)
     final_score += step_reward
     if step_reward > 0:
+        feedback_parts.append(f"Action '{action_type_val}' +{step_reward}.")
+    # ── 2. Delta reward (Round 2 DB performance change) ───────────
+    if db_delta != 0.0:
+        delta_reward = round((db_delta / 100.0) * 0.40, 4)
+        delta_reward = max(-0.40, min(0.40, delta_reward))
+        breakdown["delta_reward"] = delta_reward
+        final_score += delta_reward
+        if delta_reward > 0:
+            feedback_parts.append(f"DB improved +{db_delta:.1f} pts. Delta reward +{delta_reward}.")
+        elif delta_reward < 0:
+            feedback_parts.append(f"DB worsened {db_delta:.1f} pts. Penalty {delta_reward}.")
+    # ── 3. Milestone bonuses ──────────────────────────────────────
+    if baseline_score > 0 and current_score > 0:
+        milestone_bonus, newly_earned = check_milestones(
+            baseline_score, current_score, milestones_earned
+        )
+        if milestone_bonus > 0:
+            breakdown["milestone_bonus"] = milestone_bonus
+            final_score += milestone_bonus
+            pct = int(max(newly_earned) * 100)
+            feedback_parts.append(f"🎯 Milestone! {pct}% improvement. Bonus +{milestone_bonus}!")
+    # ── 4. Grader score for terminal actions (Round 1 compat) ─────
     grader_score = 0.0
+    is_terminal  = action_type_enum in TERMINAL_ACTIONS
+    if is_terminal and action_type_enum != ActionType.SUBMIT_REPORT:
         raw_score, grader_breakdown, grader_feedback = grade(action, task_id)
         grader_score = raw_score
+        breakdown["grader_score"]     = round(grader_score, 4)
         breakdown["grader_breakdown"] = grader_breakdown
         final_score += grader_score
         feedback_parts.append(grader_feedback)
         if grader_score >= 0.5:
+            eff_bonus = _efficiency_bonus(step_count, MAX_STEPS)
             if eff_bonus > 0:
                 final_score += eff_bonus
                 breakdown["efficiency_bonus"] = round(eff_bonus, 4)
+                feedback_parts.append(f"Efficiency bonus +{eff_bonus}.")
+    elif is_terminal and action_type_enum == ActionType.SUBMIT_REPORT:
+        # Round 2 terminal: compute from DB performance
+        if baseline_score > 0 and current_score > 0:
+            perf_improvement = (current_score - baseline_score) / max(1.0, 100.0 - baseline_score)
+            step_efficiency  = 1.0 - (step_count / max(1, MAX_STEPS))
+            terminal_score   = round(
+                (perf_improvement * 0.60) + (step_efficiency * 0.20) + 0.10, 4
+            )
+            terminal_score = max(0.001, min(0.999, terminal_score))
+            breakdown["terminal_score"]    = terminal_score
+            breakdown["perf_improvement"]  = round(perf_improvement, 4)
+            breakdown["step_efficiency"]   = round(step_efficiency, 4)
+            final_score += terminal_score
+            feedback_parts.append(
+                f"Report submitted. Performance: {baseline_score:.1f} → {current_score:.1f}. "
+                f"Terminal score: {terminal_score}."
+            )
+            # Efficiency bonus on submit_report too
+            eff_bonus = _efficiency_bonus(step_count, MAX_STEPS)
+            if eff_bonus > 0:
+                final_score += eff_bonus
+                breakdown["efficiency_bonus"] = round(eff_bonus, 4)
+                feedback_parts.append(f"Efficiency bonus +{eff_bonus}.")
+        else:
+            breakdown["terminal_score"] = 0.10
+            final_score += 0.10
+            feedback_parts.append("Report submitted.")
     elif action_type_enum == ActionType.PROPOSE_FIX:
         raw_score, grader_breakdown, _ = grade(action, task_id)
+        partial = round(raw_score * 0.4, 4)
         breakdown["partial_grader_score"] = partial
         final_score += partial
     elif action_type_enum == ActionType.IDENTIFY_ERROR:
         raw_score, _, _ = grade(action, task_id)
+        partial = round(raw_score * 0.2, 4)
         breakdown["identification_score"] = partial
         final_score += partial
+    # ── 5. Loop penalty ───────────────────────────────────────────
     if _detect_loop(previous_actions, action_type_val):
         consecutive = _count_consecutive(previous_actions, action_type_val)
+        loop_pen    = LOOP_PENALTY * min(consecutive - 1, 3)
         final_score += loop_pen
         breakdown["loop_penalty"] = round(loop_pen, 4)
+        feedback_parts.append(f"Loop detected ({consecutive}x). Penalty {loop_pen}.")
+    # ── 6. Hint penalty ───────────────────────────────────────────
     if action_type_enum == ActionType.REQUEST_HINT:
+        final_score += HINT_PENALTY
+        breakdown["hint_penalty"] = HINT_PENALTY
+        feedback_parts.append(f"Hint requested. Penalty {HINT_PENALTY}.")
+    # ── 7. Backtrack penalty ──────────────────────────────────────
+    if db_delta < -1.0:
+        final_score += BACKTRACK_PENALTY
+        breakdown["backtrack_penalty"] = BACKTRACK_PENALTY
+        feedback_parts.append(f"Performance regressed. Backtrack penalty {BACKTRACK_PENALTY}.")
+    # ── 8. Budget exhaustion penalty ─────────────────────────────
+    if step_count >= MAX_STEPS - 2 and not is_terminal:
+        final_score += BUDGET_EXHAUSTION_PEN
+        breakdown["budget_penalty"] = BUDGET_EXHAUSTION_PEN
+        feedback_parts.append("Budget nearly exhausted. Submit report now!")
+    # ── Clamp to (0.001, 0.999) ───────────────────────────────────
     final_score = round(max(0.001, min(0.999, final_score)), 4)
     breakdown["total"] = final_score
     feedback = " ".join(feedback_parts) if feedback_parts else "Step processed."
+    return Reward(score=final_score, breakdown=breakdown, feedback=feedback)
+# ─────────────────────────────────────────────
 #  EPISODE DONE CONDITION
+# ─────────────────────────────────────────────
 def is_done(
+    action_type:  ActionType,
+    step_count:   int,
+    grader_score: float = 0.0,
+    target_reached: bool = False,
 ) -> bool:
     """
     Episode ends when:
+    1. Agent submits report / final answer
     2. Max steps reached
+    3. Perfect score / target reached
     """
+    if action_type in TERMINAL_ACTIONS:
         return True
     if step_count >= MAX_STEPS:
         return True
     if grader_score >= 1.0:
         return True
+    if target_reached:
+        return True
+    return False

env/tasks.py CHANGED Viewed

@@ -3,28 +3,50 @@ import random
 from pathlib import Path
 from env.models import DifficultyLevel, TaskInfo
-#  LOAD DATASETS
 BASE_DIR = Path(__file__).parent.parent / "dataset"
 def _load(filename: str) -> list[dict]:
     path = BASE_DIR / filename
     with open(path, "r", encoding="utf-8") as f:
         return json.load(f)
 EASY_CASES   = _load("easy_cases.json")
 MEDIUM_CASES = _load("medium_cases.json")
 HARD_CASES   = _load("hard_cases.json")
 ALL_CASES: dict[str, list[dict]] = {
-    DifficultyLevel.EASY:   EASY_CASES,
-    DifficultyLevel.MEDIUM: MEDIUM_CASES,
-    DifficultyLevel.HARD:   HARD_CASES,
 }
 #  ACTION SCHEMA (required by /tasks validator)
 ACTION_SCHEMA = {
     "identify_error": {
         "description": "Identify where and what the error is without fixing it yet",
         "payload_fields": {
@@ -36,52 +58,120 @@ ACTION_SCHEMA = {
     "propose_fix": {
         "description": "Propose a fix without submitting as final answer",
         "payload_fields": {
-            "fixed_query":  {"type": "string", "required": True,  "description": "The proposed corrected SQL query"},
-            "change_made":  {"type": "string", "required": True,  "description": "What specifically was changed"},
-            "confidence":   {"type": "float",  "required": False, "description": "Confidence score 0.0-1.0"}
         }
     },
     "submit_answer": {
         "description": "Submit the final fixed query as the definitive answer",
         "payload_fields": {
-            "fixed_query":   {"type": "string", "required": True,  "description": "Final corrected SQL query"},
-            "explanation":   {"type": "string", "required": True,  "description": "Full explanation of what was wrong and how it was fixed"},
-            "error_type":    {"type": "string", "required": False, "description": "Type: syntax | logic | performance"},
-            "confidence":    {"type": "float",  "required": False, "description": "Confidence score 0.0-1.0"}
         }
     },
     "request_hint": {
-        "description": "Request a hint — costs 0.05 reward penalty per hint",
         "payload_fields": {
-            "hint_type": {"type": "string", "required": False, "description": "Type of hint wanted: location | error_type | fix_direction"}
         }
     },
     "explain_issue": {
-        "description": "Explain the issue in detail — earns partial credit even without fixing",
         "payload_fields": {
-            "explanation":    {"type": "string", "required": True,  "description": "Detailed explanation of the SQL problem"},
-            "impact":         {"type": "string", "required": False, "description": "What impact the bug has on query results or performance"},
-            "root_cause":     {"type": "string", "required": False, "description": "Root cause analysis"}
         }
     },
     "optimize_query": {
-        "description": "Submit an optimized version of the query (used for hard/performance tasks)",
         "payload_fields": {
-            "optimized_query":    {"type": "string", "required": True,  "description": "The performance-optimized SQL query"},
-            "optimization_type":  {"type": "string", "required": True,  "description": "What optimization was applied"},
-            "expected_improvement":{"type": "string", "required": False, "description": "Expected performance gain description"},
-            "explanation":        {"type": "string", "required": False, "description": "Why this optimization works"},
-            "confidence":         {"type": "float",  "required": False, "description": "Confidence 0.0-1.0"}
         }
-    }
 }
-#  TASK MANAGER
 class TaskManager:
     """
-    Manages task selection, hint generation, and task metadata.
-    All tasks are loaded from JSON datasets — no hardcoded tasks.
     """
     def __init__(self):
@@ -89,9 +179,8 @@ class TaskManager:
     def get_task(self, difficulty: DifficultyLevel, task_id: str | None = None) -> dict:
         """
-        Returns a task dict for the given difficulty.
-        If task_id is provided, returns that specific task.
-        Otherwise picks randomly, avoiding recently used tasks.
         """
         pool = ALL_CASES[difficulty]
@@ -101,7 +190,7 @@ class TaskManager:
                     return case
             raise ValueError(f"Task '{task_id}' not found in {difficulty} pool")
-        # Avoid repeating recently used tasks
         available = [c for c in pool if c["id"] not in self._used_ids]
         if not available:
             self._used_ids.clear()
@@ -112,66 +201,92 @@ class TaskManager:
         return task
     def get_random_task(self) -> dict:
-        """Pick a random task from any difficulty."""
         difficulty = random.choice(list(DifficultyLevel))
         return self.get_task(difficulty)
     def build_observation_context(self, task: dict) -> dict:
         """
-        Build the current_context dict for the Observation.
-        CRITICAL: Must NOT leak the fixed_query (ground truth) to the agent.
         """
         context = {
-            "buggy_query":       task["buggy_query"],
-            "error_message":     task["error_message"],
-            "database_schema":   task["database_schema"],
-            "error_type_hint":   task["error_type"],
-            "category":          task["category"],
-            "estimated_steps":   task["estimated_fix_steps"],
         }
-        # For performance tasks include extra context
         if task.get("performance_issue"):
             context["performance_issue"] = {
                 "type":   task["performance_issue"]["type"],
                 "impact": task["performance_issue"]["impact"],
-                # Do NOT include timing numbers — agent must figure it out
             }
-        # Include expected output shape (but not the fixed query!)
         if task.get("expected_output") and isinstance(task["expected_output"], list):
             context["expected_output_sample"] = task["expected_output"][:1]
         return context
     def get_hint(self, task: dict, hint_number: int) -> str:
-        """
-        Returns progressive hints. Each hint gives more info.
-        Hints cost -0.05 reward each (handled in reward.py).
-        """
-        hints = [
-            f"Hint 1: The error is in the {task.get('error_location', 'query')}.",
-            f"Hint 2: This is a {task.get('error_type', 'unknown')} type error. Category: {task.get('category')}.",
-            f"Hint 3: Fix description — {task.get('fix_description', 'Review the query carefully.')}",
-        ]
         idx = min(hint_number - 1, len(hints) - 1)
-        return hints[idx]
     def list_all_tasks(self) -> list[TaskInfo]:
-        """Returns TaskInfo list for the /tasks endpoint."""
         result = []
         for difficulty, cases in ALL_CASES.items():
             for case in cases:
                 result.append(TaskInfo(
-                    id=case["id"],
-                    difficulty=difficulty,
-                    description=case["description"],
-                    action_schema=ACTION_SCHEMA
                 ))
         return result
     def get_ground_truth(self, task_id: str) -> dict | None:
-        """Returns the full ground truth for a task (used by grader only)."""
         for cases in ALL_CASES.values():
             for case in cases:
                 if case["id"] == task_id:
@@ -180,4 +295,4 @@ class TaskManager:
 # Singleton instance
-task_manager = TaskManager()

 from pathlib import Path
 from env.models import DifficultyLevel, TaskInfo
+# ─────────────────────────────────────────────
+#  LOAD DATASETS — Round 1 + Round 2
+# ─────────────────────────────────────────────
 BASE_DIR = Path(__file__).parent.parent / "dataset"
 def _load(filename: str) -> list[dict]:
     path = BASE_DIR / filename
     with open(path, "r", encoding="utf-8") as f:
         return json.load(f)
+# Round 1 cases (keep for backward compatibility)
 EASY_CASES   = _load("easy_cases.json")
 MEDIUM_CASES = _load("medium_cases.json")
 HARD_CASES   = _load("hard_cases.json")
+# Round 2 scenarios (new long-horizon DB engineering tasks)
+EASY_SCENARIOS   = _load("easy_scenarios.json")
+MEDIUM_SCENARIOS = _load("medium_scenarios.json")
+HARD_SCENARIOS   = _load("hard_scenarios.json")
+# Combined pools — Round 2 scenarios take priority (listed first)
 ALL_CASES: dict[str, list[dict]] = {
+    DifficultyLevel.EASY:   EASY_SCENARIOS   + EASY_CASES,
+    DifficultyLevel.MEDIUM: MEDIUM_SCENARIOS + MEDIUM_CASES,
+    DifficultyLevel.HARD:   HARD_SCENARIOS   + HARD_CASES,
 }
+# Round 2 only (for training pipeline)
+SCENARIO_ONLY: dict[str, list[dict]] = {
+    DifficultyLevel.EASY:   EASY_SCENARIOS,
+    DifficultyLevel.MEDIUM: MEDIUM_SCENARIOS,
+    DifficultyLevel.HARD:   HARD_SCENARIOS,
+}
+# ─────────────────────────────────────────────
 #  ACTION SCHEMA (required by /tasks validator)
+# ─────────────────────────────────────────────
 ACTION_SCHEMA = {
+    # ── Round 1 actions ──────────────────────────────────────────
     "identify_error": {
         "description": "Identify where and what the error is without fixing it yet",
         "payload_fields": {
     "propose_fix": {
         "description": "Propose a fix without submitting as final answer",
         "payload_fields": {
+            "fixed_query": {"type": "string", "required": True,  "description": "The proposed corrected SQL query"},
+            "change_made": {"type": "string", "required": True,  "description": "What specifically was changed"},
+            "confidence":  {"type": "float",  "required": False, "description": "Confidence score 0.0-1.0"}
         }
     },
     "submit_answer": {
         "description": "Submit the final fixed query as the definitive answer",
         "payload_fields": {
+            "fixed_query": {"type": "string", "required": True,  "description": "Final corrected SQL query"},
+            "explanation": {"type": "string", "required": True,  "description": "Full explanation of fix"},
+            "error_type":  {"type": "string", "required": False, "description": "syntax | logic | performance"},
+            "confidence":  {"type": "float",  "required": False, "description": "Confidence 0.0-1.0"}
         }
     },
     "request_hint": {
+        "description": "Request a hint — costs 0.10 reward penalty per hint",
         "payload_fields": {
+            "hint_type": {"type": "string", "required": False, "description": "location | error_type | fix_direction"}
         }
     },
     "explain_issue": {
+        "description": "Explain the issue in detail",
         "payload_fields": {
+            "explanation": {"type": "string", "required": True,  "description": "Detailed explanation"},
+            "impact":      {"type": "string", "required": False, "description": "Impact on query performance"},
+            "root_cause":  {"type": "string", "required": False, "description": "Root cause analysis"}
         }
     },
     "optimize_query": {
+        "description": "Submit an optimized version of the query",
+        "payload_fields": {
+            "optimized_query":     {"type": "string", "required": True,  "description": "Optimized SQL"},
+            "optimization_type":   {"type": "string", "required": True,  "description": "What optimization was applied"},
+            "expected_improvement":{"type": "string", "required": False, "description": "Expected performance gain"},
+            "explanation":         {"type": "string", "required": False, "description": "Why this optimization works"},
+            "confidence":          {"type": "float",  "required": False, "description": "Confidence 0.0-1.0"}
+        }
+    },
+    # ── Round 2 actions ──────────────────────────────────────────
+    "inspect_query": {
+        "description": "EXPLAIN a slow query — reveals scan type, rows examined, index usage",
+        "payload_fields": {
+            "query_id": {"type": "string", "required": True, "description": "ID of slow query to inspect (e.g. 'q1')"}
+        }
+    },
+    "analyze_indexes": {
+        "description": "Show all indexes on a table + usage frequency + missing index hints",
         "payload_fields": {
+            "table": {"type": "string", "required": True, "description": "Table name to analyze"}
         }
+    },
+    "create_index": {
+        "description": "Add a composite index on specified columns — core optimization action",
+        "payload_fields": {
+            "table":   {"type": "string",      "required": True, "description": "Table to index"},
+            "columns": {"type": "list|string", "required": True, "description": "Columns to index (list or comma-separated string)"}
+        }
+    },
+    "rewrite_query": {
+        "description": "Submit a rewritten SQL query — system evaluates execution time improvement",
+        "payload_fields": {
+            "query_id": {"type": "string", "required": True, "description": "ID of query to rewrite"},
+            "new_sql":  {"type": "string", "required": True, "description": "Rewritten SQL query"}
+        }
+    },
+    "add_column": {
+        "description": "Add a denormalization column to reduce expensive JOINs",
+        "payload_fields": {
+            "table":   {"type": "string", "required": True,  "description": "Table to modify"},
+            "column":  {"type": "string", "required": True,  "description": "New column name"},
+            "purpose": {"type": "string", "required": False, "description": "Why this column helps"}
+        }
+    },
+    "drop_index": {
+        "description": "Remove an unused index to reduce write overhead",
+        "payload_fields": {
+            "table":      {"type": "string", "required": True, "description": "Table name"},
+            "index_name": {"type": "string", "required": True, "description": "Index name to drop (cannot drop PRIMARY)"}
+        }
+    },
+    "partition_table": {
+        "description": "Partition a large table by date or ID range for range query efficiency",
+        "payload_fields": {
+            "table":          {"type": "string", "required": True,  "description": "Table to partition"},
+            "partition_by":   {"type": "string", "required": False, "description": "Column to partition on (e.g. 'created_at')"},
+            "partition_type": {"type": "string", "required": False, "description": "RANGE | LIST | HASH"}
+        }
+    },
+    "analyze_statistics": {
+        "description": "Update table statistics for query planner accuracy",
+        "payload_fields": {
+            "table": {"type": "string", "required": True, "description": "Table to analyze"}
+        }
+    },
+    "submit_report": {
+        "description": "TERMINAL: Submit final optimization report — ends episode, computes full score",
+        "payload_fields": {
+            "summary":       {"type": "string", "required": True,  "description": "Summary of optimizations applied"},
+            "actions_taken": {"type": "list",   "required": False, "description": "List of key actions taken"},
+            "expected_gain": {"type": "string", "required": False, "description": "Expected performance improvement"}
+        }
+    },
 }
+# ─────────────────────────────────────────────
+#  TASK MANAGER
+# ─────────────────────────────────────────────
 class TaskManager:
     """
+    Manages task selection for both Round 1 and Round 2 scenarios.
+    Round 2 scenarios have tables/slow_queries structure.
+    Round 1 cases have buggy_query structure.
     """
     def __init__(self):
     def get_task(self, difficulty: DifficultyLevel, task_id: str | None = None) -> dict:
         """
+        Returns a task for the given difficulty.
+        Prefers Round 2 scenarios, falls back to Round 1 cases.
         """
         pool = ALL_CASES[difficulty]
                     return case
             raise ValueError(f"Task '{task_id}' not found in {difficulty} pool")
+        # Avoid recently used tasks
         available = [c for c in pool if c["id"] not in self._used_ids]
         if not available:
             self._used_ids.clear()
         return task
     def get_random_task(self) -> dict:
         difficulty = random.choice(list(DifficultyLevel))
         return self.get_task(difficulty)
+    def get_scenario(self, difficulty: DifficultyLevel, scenario_id: str | None = None) -> dict:
+        """Get Round 2 scenario specifically."""
+        pool = SCENARIO_ONLY[difficulty]
+        if scenario_id:
+            for s in pool:
+                if s["id"] == scenario_id:
+                    return s
+            raise ValueError(f"Scenario '{scenario_id}' not found")
+        return random.choice(pool)
     def build_observation_context(self, task: dict) -> dict:
         """
+        Builds current_context for the Observation.
+        Handles both Round 2 scenario format and Round 1 case format.
+        CRITICAL: Never leaks ground truth (fixed_query / optimal_actions).
         """
+        # ── Round 2 scenario format ───────────────────────────────
+        if "slow_queries" in task:
+            return {
+                "scenario_id":          task["id"],
+                "description":          task.get("description", ""),
+                "tables":               task.get("tables", []),
+                "slow_queries":         task.get("slow_queries", []),
+                "performance_score_baseline": task.get("performance_score_baseline", 0.0),
+                "target_score":         task.get("target_score", 85.0),
+                "max_steps":            task.get("max_steps", 50),
+                "category":             task.get("category", ""),
+                # Do NOT include missing_index_hints (that's the answer)
+                # Do NOT include optimal_actions (that's the answer)
+            }
+        # ── Round 1 case format (backward compatible) ────────────
         context = {
+            "buggy_query":     task.get("buggy_query", ""),
+            "error_message":   task.get("error_message", ""),
+            "database_schema": task.get("database_schema", ""),
+            "error_type_hint": task.get("error_type", ""),
+            "category":        task.get("category", ""),
+            "estimated_steps": task.get("estimated_fix_steps", 5),
         }
         if task.get("performance_issue"):
             context["performance_issue"] = {
                 "type":   task["performance_issue"]["type"],
                 "impact": task["performance_issue"]["impact"],
             }
         if task.get("expected_output") and isinstance(task["expected_output"], list):
             context["expected_output_sample"] = task["expected_output"][:1]
         return context
     def get_hint(self, task: dict, hint_number: int) -> str:
+        """Progressive hints. Each hint reveals more info. Costs -0.10 each."""
+        # Round 2 scenario hints
+        if "slow_queries" in task:
+            hints = [
+                f"Hint 1: Start by inspecting your slow queries with inspect_query action.",
+                f"Hint 2: Use analyze_indexes on tables appearing in slow queries.",
+                f"Hint 3: Category is '{task.get('category', 'indexing')}'. Target score: {task.get('target_score', 85.0)}.",
+            ]
+        else:
+            # Round 1 hints
+            hints = [
+                f"Hint 1: The error is in the {task.get('error_location', 'query')}.",
+                f"Hint 2: This is a {task.get('error_type', 'unknown')} error. Category: {task.get('category')}.",
+                f"Hint 3: Fix: {task.get('fix_description', 'Review the query carefully.')}",
+            ]
         idx = min(hint_number - 1, len(hints) - 1)
+        return hints[max(0, idx)]
     def list_all_tasks(self) -> list[TaskInfo]:
+        """Returns TaskInfo list for the /tasks endpoint — all 30 tasks."""
         result = []
         for difficulty, cases in ALL_CASES.items():
             for case in cases:
                 result.append(TaskInfo(
+                    id            = case["id"],
+                    difficulty    = difficulty,
+                    description   = case.get("description", ""),
+                    action_schema = ACTION_SCHEMA
                 ))
         return result
     def get_ground_truth(self, task_id: str) -> dict | None:
+        """Returns full task including ground truth (used by grader only)."""
         for cases in ALL_CASES.values():
             for case in cases:
                 if case["id"] == task_id:
 # Singleton instance
+task_manager = TaskManager()

tests/test_environment.py CHANGED Viewed

@@ -22,8 +22,7 @@ def test_reset_easy(env):
     assert obs.step_count == 0
     assert obs.difficulty == DifficultyLevel.EASY
     assert "fixed_query" not in obs.current_context
-    assert "buggy_query" in obs.current_context
 def test_reset_medium(env):
     obs = env.reset(difficulty="medium")
@@ -65,7 +64,7 @@ def test_step_null_action(env):
     """Null action must return -0.1, never crash."""
     env.reset(difficulty="easy")
     resp = env.step(None)
-    assert resp.reward.score == -0.1
     assert resp.done == False
@@ -110,7 +109,7 @@ def test_max_steps(env):
     action = Action(action_type=ActionType.IDENTIFY_ERROR,
                     payload={"error_location": "x", "error_type": "syntax"})
     done = False
-    for _ in range(25):
         resp = env.step(action)
         if resp.done:
             done = True

     assert obs.step_count == 0
     assert obs.difficulty == DifficultyLevel.EASY
     assert "fixed_query" not in obs.current_context
+    assert "buggy_query" in obs.current_context or "slow_queries" in obs.current_context
 def test_reset_medium(env):
     obs = env.reset(difficulty="medium")
     """Null action must return -0.1, never crash."""
     env.reset(difficulty="easy")
     resp = env.step(None)
+    assert resp.reward.score >= 0.001
     assert resp.done == False
     action = Action(action_type=ActionType.IDENTIFY_ERROR,
                     payload={"error_location": "x", "error_type": "syntax"})
     done = False
+    for _ in range(55):
         resp = env.step(action)
         if resp.done:
             done = True

tests/test_graders.py CHANGED Viewed

@@ -21,7 +21,7 @@ def test_easy_perfect_score():
 def test_null_action_returns_zero():
     score, breakdown, feedback = grade(None, "easy_001")
-    assert score == 0.0
     assert "null" in feedback.lower() or "no action" in feedback.lower()
@@ -29,7 +29,7 @@ def test_unknown_task_returns_zero():
     action = Action(action_type=ActionType.SUBMIT_ANSWER,
                     payload={"fixed_query": "SELECT 1", "explanation": "test"})
     score, _, _ = grade(action, "nonexistent_task_999")
-    assert score == 0.0
 def test_determinism():

 def test_null_action_returns_zero():
     score, breakdown, feedback = grade(None, "easy_001")
+    assert score <= 0.001  # clamped minimum for OpenEnv compliance
     assert "null" in feedback.lower() or "no action" in feedback.lower()
     action = Action(action_type=ActionType.SUBMIT_ANSWER,
                     payload={"fixed_query": "SELECT 1", "explanation": "test"})
     score, _, _ = grade(action, "nonexistent_task_999")
+    assert score <= 0.001
 def test_determinism():

training/evaluate_agent.py ADDED Viewed

File without changes

training/generate_training_data.py ADDED Viewed

File without changes

training/train_agent.py ADDED Viewed

File without changes