Spaces:

junaid0600
/

sql-db-engineer-agent

Sleeping

App Files Files Community

junaid0600 commited on Apr 5

Commit

5447299

1 Parent(s): 04e5467

Update inference.py with [START]/[STEP]/[END] format and dotenv loading

Browse files

Files changed (1) hide show

inference.py +269 -10

inference.py CHANGED Viewed

@@ -1,16 +1,275 @@
-# inference.py — required by OpenEnv validator
-from baseline import run_baseline
-# Entry point for openenv validator
-app = None  # placeholder for server entry point
 def main():
-    print("Running inference / baseline agent...")
-    response = run_baseline()
-    print(f"\nInference Results:")
-    for r in response.results:
-        print(f"  {r.difficulty.value:8} | {r.task_id:12} | score={r.score} | steps={r.steps}")
-    print(f"\nAverage Score: {response.average_score}")
 if __name__ == "__main__":
     main()

+"""
+inference.py — SQL Query Debugger OpenEnv
+Follows the mandatory [START]/[STEP]/[END] stdout format.
+Uses OpenAI client with API_BASE_URL, MODEL_NAME, HF_TOKEN.
+"""
+import os
+import json
+import textwrap
+from typing import List, Optional
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+from env.environment import SQLDebuggerEnvironment
+from env.models import Action, ActionType, DifficultyLevel
+# ─────────────────────────────────────────────
+#  ENVIRONMENT VARIABLES
+# ─────────────────────────────────────────────
+API_KEY      = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY") or "dummy-key"
+API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
+BENCHMARK    = "sql-query-debugger"
+MAX_STEPS    = 10
+SUCCESS_SCORE_THRESHOLD = 0.5
+# ─────────────────────────────────────────────
+#  LOGGING FUNCTIONS — exact format required
+# ─────────────────────────────────────────────
+def log_start(task: str, env: str, model: str) -> None:
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
+    error_val = error if error else "null"
+    done_val  = str(done).lower()
+    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
+def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
+# ─────────────────────────────────────────────
+#  SYSTEM PROMPT
+# ─────────────────────────────────────────────
+SYSTEM_PROMPT = textwrap.dedent("""
+    You are an expert SQL debugger. You will be given a buggy SQL query and must fix it.
+    You must respond with a JSON object only — no explanation outside the JSON.
+    For syntax/logic errors, respond with:
+    {
+        "action_type": "submit_answer",
+        "fixed_query": "<your fixed SQL query here>",
+        "explanation": "<brief explanation of what was wrong>",
+        "error_type": "<syntax|logic|performance>",
+        "error_location": "<where in the query the error is>",
+        "confidence": 0.9
+    }
+    For performance issues, respond with:
+    {
+        "action_type": "optimize_query",
+        "optimized_query": "<your optimized SQL query here>",
+        "optimization_type": "<what optimization was applied>",
+        "explanation": "<why this optimization works>",
+        "root_cause": "<what caused the performance issue>",
+        "expected_improvement": "<expected performance gain>",
+        "confidence": 0.85
+    }
+    Always provide valid JSON. Never include markdown code blocks.
+""").strip()
+def build_user_prompt(obs) -> str:
+    ctx = obs.current_context
+    return textwrap.dedent(f"""
+        Task: {obs.task_description}
+        Difficulty: {obs.difficulty}
+        Buggy Query:
+        {ctx.get('buggy_query', 'N/A')}
+        Error Message:
+        {ctx.get('error_message', 'N/A')}
+        Database Schema:
+        {json.dumps(ctx.get('database_schema', {}), indent=2)}
+        Error Type Hint: {ctx.get('error_type_hint', 'unknown')}
+        Category: {ctx.get('category', 'unknown')}
+        Steps Remaining: {ctx.get('steps_remaining', 20)}
+        Analyze the buggy query and provide your fix as a JSON object.
+    """).strip()
+# ─────────────────────────────────────────────
+#  LLM CALL
+# ─────────────────────────────────────────────
+def get_llm_action(client: OpenAI, obs, step: int) -> Action:
+    """Call the LLM and parse its response into an Action."""
+    user_prompt = build_user_prompt(obs)
+    try:
+        completion = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user",   "content": user_prompt},
+            ],
+            temperature=0.3,
+            max_tokens=512,
+            stream=False,
+        )
+        text = (completion.choices[0].message.content or "").strip()
+        # Parse JSON response
+        # Remove markdown code blocks if present
+        if "```" in text:
+            text = text.split("```")[1]
+            if text.startswith("json"):
+                text = text[4:]
+        text = text.strip()
+        data        = json.loads(text)
+        action_type = data.get("action_type", "submit_answer")
+        if action_type == "optimize_query":
+            return Action(
+                action_type=ActionType.OPTIMIZE_QUERY,
+                payload={
+                    "optimized_query":      data.get("optimized_query", "SELECT 1"),
+                    "optimization_type":    data.get("optimization_type", "Performance fix"),
+                    "explanation":          data.get("explanation", ""),
+                    "root_cause":           data.get("root_cause", ""),
+                    "expected_improvement": data.get("expected_improvement", ""),
+                    "confidence":           float(data.get("confidence", 0.7)),
+                }
+            )
+        else:
+            return Action(
+                action_type=ActionType.SUBMIT_ANSWER,
+                payload={
+                    "fixed_query":    data.get("fixed_query", "SELECT 1"),
+                    "explanation":    data.get("explanation", ""),
+                    "error_type":     data.get("error_type", "syntax"),
+                    "error_location": data.get("error_location", "unknown"),
+                    "confidence":     float(data.get("confidence", 0.7)),
+                }
+            )
+    except Exception as exc:
+        print(f"[DEBUG] LLM call failed: {exc}", flush=True)
+        # Fallback to identify_error action
+        return Action(
+            action_type=ActionType.IDENTIFY_ERROR,
+            payload={
+                "error_location": "unknown",
+                "error_type":     "syntax",
+                "explanation":    "LLM call failed, using fallback"
+            }
+        )
+# ─────────────────────────────────────────────
+#  MAIN INFERENCE LOOP
+# ─────────────────────────────────────────────
+def run_episode(client: OpenAI, difficulty: str, task_id: str) -> dict:
+    """Run one full episode and return results."""
+    env      = SQLDebuggerEnvironment()
+    obs      = env.reset(difficulty=difficulty, task_id=task_id)
+    rewards  = []
+    steps    = 0
+    success  = False
+    score    = 0.0
+    log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
+    try:
+        for step in range(1, MAX_STEPS + 1):
+            if env.state().done:
+                break
+            # Get action from LLM
+            action       = get_llm_action(client, obs, step)
+            action_str   = f"{action.action_type.value}"
+            error_str    = None
+            try:
+                resp   = env.step(action)
+                reward = resp.reward.score
+                done   = resp.done
+                obs    = resp.observation
+            except Exception as e:
+                reward   = -0.1
+                done     = False
+                error_str = str(e)[:100]
+            rewards.append(reward)
+            steps = step
+            log_step(
+                step   = step,
+                action = action_str,
+                reward = reward,
+                done   = done,
+                error  = error_str
+            )
+            if done:
+                break
+        # Calculate score
+        total_reward = sum(rewards)
+        score        = min(max(total_reward / MAX_STEPS, 0.0), 1.0)
+        success      = score >= SUCCESS_SCORE_THRESHOLD
+    except Exception as e:
+        print(f"[DEBUG] Episode error: {e}", flush=True)
+        error_str = str(e)[:100]
+    finally:
+        log_end(
+            success = success,
+            steps   = steps,
+            score   = score,
+            rewards = rewards
+        )
+    return {
+        "task_id":    task_id,
+        "difficulty": difficulty,
+        "score":      score,
+        "steps":      steps,
+        "success":    success,
+    }
 def main():
+    """Main entry point — runs inference on all 3 difficulty levels."""
+    print(f"[DEBUG] API_BASE_URL={API_BASE_URL}", flush=True)
+    print(f"[DEBUG] MODEL_NAME={MODEL_NAME}", flush=True)
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    tasks = [
+        ("easy",   "easy_001"),
+        ("medium", "medium_001"),
+        ("hard",   "hard_001"),
+    ]
+    results = []
+    for difficulty, task_id in tasks:
+        result = run_episode(client, difficulty, task_id)
+        results.append(result)
+    # Final summary
+    avg_score = sum(r["score"] for r in results) / len(results)
+    print(f"\n[DEBUG] Average Score: {avg_score:.3f}", flush=True)
+    for r in results:
+        print(f"[DEBUG] {r['difficulty']:8} | {r['task_id']:12} | score={r['score']:.3f} | steps={r['steps']}", flush=True)
 if __name__ == "__main__":
     main()