Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover" /> | |
| <meta name="color-scheme" content="light" /> | |
| <meta name="theme-color" content="#f6f7fb" /> | |
| <meta name="description" content="SQL Debug OpenEnv: architecture, live /reset and /step playground, and training evidence. Hugging Face Space." /> | |
| <meta property="og:title" content="SQL Debug Environment — Space Demo" /> | |
| <meta property="og:description" content="OpenEnv-compliant SQL debugging environment with live rewards, GRPO training hooks, and reproducible artifacts." /> | |
| <meta property="og:type" content="website" /> | |
| <title>SQL Debug Environment · Hugging Face Space</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com" /> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin /> | |
| <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,500&family=Fraunces:ital,opsz,wght@0,9..144,500;0,9..144,600;0,9..144,700;1,9..144,500&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" /> | |
| <style> | |
| :root { | |
| --space-bg: #f0f2f6; | |
| --space-bg-elevated: #fafbfe; | |
| --space-border: #e2e6ef; | |
| --space-border-strong: #cdd5e5; | |
| --ink: #111827; | |
| --ink-soft: #374151; | |
| --muted: #6b7280; | |
| --muted-light: #9ca3af; | |
| --card: #ffffff; | |
| --card-shadow: 0 1px 2px rgba(16, 24, 40, 0.04), 0 8px 28px rgba(16, 24, 40, 0.06); | |
| --card-shadow-hover: 0 1px 2px rgba(16, 24, 40, 0.06), 0 12px 36px rgba(16, 24, 40, 0.08); | |
| --hf-amber: #f59e0b; | |
| --hf-amber-soft: #fff7ed; | |
| --accent: #2563eb; | |
| --accent-soft: #eff6ff; | |
| --diagram-bg: #0c1222; | |
| --diagram-border: #1e293b; | |
| --radius: 14px; | |
| --radius-lg: 20px; | |
| --font: "DM Sans", system-ui, -apple-system, sans-serif; | |
| --font-display: "Fraunces", Georgia, serif; | |
| --font-mono: "JetBrains Mono", ui-monospace, monospace; | |
| --safe-top: env(safe-area-inset-top, 0px); | |
| --safe-bottom: env(safe-area-inset-bottom, 0px); | |
| } | |
| * { box-sizing: border-box; } | |
| html { | |
| scroll-behavior: smooth; | |
| scroll-padding-top: 112px; | |
| } | |
| body { | |
| margin: 0; | |
| font-family: var(--font); | |
| color: var(--ink); | |
| background: var(--space-bg); | |
| min-height: 100vh; | |
| min-height: 100dvh; | |
| line-height: 1.55; | |
| -webkit-font-smoothing: antialiased; | |
| } | |
| a { color: var(--accent); } | |
| a:focus-visible, button:focus-visible, select:focus-visible, textarea:focus-visible { | |
| outline: 2px solid var(--accent); | |
| outline-offset: 2px; | |
| } | |
| .space-shell { | |
| min-height: 100vh; | |
| min-height: 100dvh; | |
| display: flex; | |
| flex-direction: column; | |
| } | |
| .space-banner { | |
| position: sticky; | |
| top: 0; | |
| z-index: 40; | |
| padding: calc(10px + var(--safe-top)) 16px 10px; | |
| background: linear-gradient(180deg, rgba(255,255,255,0.96) 0%, rgba(250,251,254,0.94) 100%); | |
| backdrop-filter: blur(12px); | |
| -webkit-backdrop-filter: blur(12px); | |
| border-bottom: 1px solid var(--space-border); | |
| box-shadow: 0 4px 24px rgba(15, 23, 42, 0.04); | |
| } | |
| .space-banner-inner { | |
| max-width: 1120px; | |
| margin: 0 auto; | |
| display: flex; | |
| flex-wrap: wrap; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px 20px; | |
| } | |
| .space-brand { | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| flex: 1 1 auto; | |
| min-width: 0; | |
| } | |
| .space-logo { | |
| width: 38px; | |
| height: 38px; | |
| border-radius: 10px; | |
| background: linear-gradient(135deg, #fbbf24, #f59e0b); | |
| box-shadow: 0 2px 8px rgba(245, 158, 11, 0.35); | |
| flex-shrink: 0; | |
| } | |
| .space-brand h1 { | |
| margin: 0; | |
| font-family: var(--font-display); | |
| font-size: 1.05rem; | |
| font-weight: 600; | |
| letter-spacing: -0.02em; | |
| color: var(--ink); | |
| line-height: 1.2; | |
| } | |
| .space-brand p { | |
| margin: 2px 0 0; | |
| font-size: 0.75rem; | |
| color: var(--muted); | |
| font-weight: 500; | |
| } | |
| .space-actions { | |
| display: flex; | |
| flex-wrap: wrap; | |
| align-items: center; | |
| gap: 8px; | |
| } | |
| .btn-ghost { | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 6px; | |
| padding: 8px 14px; | |
| font-size: 0.8125rem; | |
| font-weight: 600; | |
| font-family: inherit; | |
| color: var(--ink-soft); | |
| background: var(--card); | |
| border: 1px solid var(--space-border-strong); | |
| border-radius: 999px; | |
| text-decoration: none; | |
| cursor: pointer; | |
| transition: border-color 0.15s, box-shadow 0.15s, background 0.15s; | |
| } | |
| .btn-ghost:hover { | |
| border-color: var(--muted-light); | |
| box-shadow: var(--card-shadow); | |
| } | |
| .btn-primary { | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 6px; | |
| padding: 8px 16px; | |
| font-size: 0.8125rem; | |
| font-weight: 700; | |
| font-family: inherit; | |
| color: #1c1917; | |
| background: linear-gradient(180deg, #fde68a, #fbbf24); | |
| border: 1px solid #d97706; | |
| border-radius: 999px; | |
| text-decoration: none; | |
| cursor: pointer; | |
| box-shadow: 0 1px 0 rgba(255,255,255,0.5) inset; | |
| transition: filter 0.15s, transform 0.1s; | |
| } | |
| .btn-primary:hover { filter: brightness(1.03); } | |
| .btn-primary:active { transform: scale(0.98); } | |
| .sticky-nav { | |
| position: sticky; | |
| top: calc(58px + var(--safe-top)); | |
| z-index: 30; | |
| margin: 0 auto; | |
| max-width: 1120px; | |
| padding: 0 16px 8px; | |
| } | |
| .sticky-nav-inner { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 6px; | |
| padding: 6px; | |
| background: var(--card); | |
| border: 1px solid var(--space-border); | |
| border-radius: 999px; | |
| box-shadow: var(--card-shadow); | |
| width: fit-content; | |
| max-width: 100%; | |
| } | |
| .sticky-nav a { | |
| padding: 8px 14px; | |
| font-size: 0.78rem; | |
| font-weight: 600; | |
| color: var(--muted); | |
| text-decoration: none; | |
| border-radius: 999px; | |
| transition: background 0.15s, color 0.15s; | |
| white-space: nowrap; | |
| } | |
| .sticky-nav a:hover { | |
| color: var(--ink); | |
| background: var(--space-bg); | |
| } | |
| .main { | |
| flex: 1; | |
| max-width: 1120px; | |
| margin: 0 auto; | |
| padding: 8px 16px calc(32px + var(--safe-bottom)); | |
| width: 100%; | |
| } | |
| .api-strip { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-bottom: 20px; | |
| } | |
| .api-chip { | |
| font-family: var(--font-mono); | |
| font-size: 0.68rem; | |
| font-weight: 500; | |
| padding: 5px 10px; | |
| border-radius: 8px; | |
| background: var(--card); | |
| border: 1px solid var(--space-border); | |
| color: var(--ink-soft); | |
| } | |
| .api-chip span { color: var(--muted); margin-right: 6px; } | |
| .section { | |
| margin-bottom: 28px; | |
| } | |
| .section-id { | |
| font-size: 0.65rem; | |
| font-weight: 700; | |
| letter-spacing: 0.18em; | |
| text-transform: uppercase; | |
| color: var(--hf-amber); | |
| margin-bottom: 8px; | |
| } | |
| .hero-title { | |
| font-family: var(--font-display); | |
| font-weight: 600; | |
| font-size: clamp(1.75rem, 4.2vw, 2.5rem); | |
| line-height: 1.12; | |
| margin: 0 0 12px; | |
| letter-spacing: -0.02em; | |
| color: var(--ink); | |
| } | |
| .hero-title em { | |
| font-style: italic; | |
| color: var(--accent); | |
| } | |
| .lede { | |
| max-width: 54ch; | |
| color: var(--muted); | |
| font-size: 1rem; | |
| margin: 0 0 18px; | |
| } | |
| .layer-strip { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-bottom: 20px; | |
| } | |
| .layer { | |
| font-size: 0.68rem; | |
| font-weight: 700; | |
| letter-spacing: 0.05em; | |
| text-transform: uppercase; | |
| padding: 6px 11px; | |
| border-radius: 8px; | |
| border: 1px solid var(--space-border); | |
| background: var(--space-bg-elevated); | |
| color: var(--muted); | |
| } | |
| .layer b { color: var(--ink); } | |
| .panel { | |
| background: var(--card); | |
| border: 1px solid var(--space-border); | |
| border-radius: var(--radius-lg); | |
| padding: 20px; | |
| box-shadow: var(--card-shadow); | |
| margin-bottom: 20px; | |
| transition: box-shadow 0.2s; | |
| } | |
| .panel:hover { box-shadow: var(--card-shadow-hover); } | |
| .panel-header { | |
| display: flex; | |
| flex-wrap: wrap; | |
| align-items: flex-start; | |
| justify-content: space-between; | |
| gap: 12px; | |
| margin-bottom: 14px; | |
| } | |
| .panel-header h2 { | |
| margin: 0; | |
| font-size: 1.1rem; | |
| font-weight: 700; | |
| color: var(--ink); | |
| } | |
| .panel-header .caption { | |
| margin: 0; | |
| font-size: 0.8125rem; | |
| color: var(--muted); | |
| max-width: 38ch; | |
| line-height: 1.45; | |
| } | |
| .diagram-wrap { | |
| border-radius: var(--radius); | |
| overflow: auto; | |
| -webkit-overflow-scrolling: touch; | |
| background: var(--diagram-bg); | |
| border: 1px solid var(--diagram-border); | |
| box-shadow: inset 0 1px 0 rgba(255,255,255,0.06); | |
| max-height: min(92vh, 1400px); | |
| } | |
| .diagram-wrap img { | |
| display: block; | |
| width: 100%; | |
| max-width: 100%; | |
| height: auto; | |
| max-height: none; | |
| object-fit: contain; | |
| object-position: center top; | |
| cursor: zoom-in; | |
| } | |
| img.sde-zoomable { | |
| cursor: zoom-in; | |
| transition: box-shadow 0.15s ease, transform 0.12s ease; | |
| } | |
| img.sde-zoomable:hover { | |
| box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.35); | |
| } | |
| .figure-footer { | |
| display: flex; | |
| flex-wrap: wrap; | |
| justify-content: space-between; | |
| gap: 10px; | |
| padding-top: 14px; | |
| margin-top: 4px; | |
| font-size: 0.75rem; | |
| color: var(--muted); | |
| } | |
| .legend { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 12px; | |
| } | |
| .legend span::before { | |
| content: ""; | |
| display: inline-block; | |
| width: 7px; | |
| height: 7px; | |
| border-radius: 2px; | |
| margin-right: 5px; | |
| vertical-align: middle; | |
| } | |
| .legend .l-api::before { background: #22c55e; } | |
| .legend .l-env::before { background: #a78bfa; } | |
| .legend .l-data::before { background: #fb923c; } | |
| .legend .l-train::before { background: #2dd4bf; } | |
| .badges { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| } | |
| .badge { | |
| font-size: 0.65rem; | |
| font-weight: 700; | |
| letter-spacing: 0.05em; | |
| text-transform: uppercase; | |
| padding: 5px 10px; | |
| border-radius: 999px; | |
| border: 1px solid var(--space-border); | |
| color: var(--muted); | |
| background: var(--space-bg-elevated); | |
| } | |
| .section-head { | |
| margin-bottom: 14px; | |
| } | |
| .section-head h2 { | |
| margin: 0 0 6px; | |
| font-family: var(--font-display); | |
| font-size: 1.35rem; | |
| font-weight: 600; | |
| color: var(--ink); | |
| } | |
| .section-head p { | |
| margin: 0; | |
| color: var(--muted); | |
| font-size: 0.9375rem; | |
| } | |
| .grid { | |
| display: grid; | |
| gap: 16px; | |
| grid-template-columns: 1fr; | |
| } | |
| @media (min-width: 860px) { | |
| .grid.cols-2 { grid-template-columns: 1fr 1fr; } | |
| .grid.cols-12 { grid-template-columns: repeat(12, 1fr); } | |
| .span-4 { grid-column: span 4; } | |
| .span-8 { grid-column: span 8; } | |
| } | |
| .play-card { | |
| background: var(--card); | |
| border: 1px solid var(--space-border); | |
| border-radius: var(--radius-lg); | |
| padding: 20px; | |
| box-shadow: var(--card-shadow); | |
| } | |
| label { | |
| display: block; | |
| font-size: 0.7rem; | |
| font-weight: 700; | |
| letter-spacing: 0.07em; | |
| text-transform: uppercase; | |
| color: var(--muted); | |
| margin-top: 14px; | |
| margin-bottom: 6px; | |
| } | |
| label:first-of-type { margin-top: 0; } | |
| select, textarea { | |
| width: 100%; | |
| font-family: inherit; | |
| font-size: 0.9375rem; | |
| border-radius: 10px; | |
| border: 1px solid var(--space-border-strong); | |
| background: var(--space-bg-elevated); | |
| color: var(--ink); | |
| padding: 12px 14px; | |
| transition: border-color 0.15s, box-shadow 0.15s; | |
| } | |
| select:focus, textarea:focus { | |
| outline: none; | |
| border-color: var(--accent); | |
| box-shadow: 0 0 0 3px var(--accent-soft); | |
| } | |
| textarea { | |
| min-height: 140px; | |
| resize: vertical; | |
| font-family: var(--font-mono); | |
| font-size: 0.8125rem; | |
| line-height: 1.5; | |
| } | |
| .btn-action { | |
| margin-top: 12px; | |
| width: 100%; | |
| min-height: 46px; | |
| font-family: inherit; | |
| font-size: 0.9375rem; | |
| font-weight: 700; | |
| cursor: pointer; | |
| border-radius: 10px; | |
| border: none; | |
| color: #fff; | |
| background: linear-gradient(135deg, #2563eb, #4f46e5); | |
| box-shadow: 0 4px 14px rgba(37, 99, 235, 0.35); | |
| transition: opacity 0.15s, transform 0.1s; | |
| } | |
| .btn-action:hover:not(:disabled) { filter: brightness(1.05); } | |
| .btn-action:active:not(:disabled) { transform: scale(0.99); } | |
| .btn-action:disabled { | |
| opacity: 0.55; | |
| cursor: not-allowed; | |
| } | |
| .session-pill { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 8px; | |
| font-size: 0.75rem; | |
| color: var(--muted); | |
| margin-bottom: 10px; | |
| padding: 6px 12px; | |
| background: var(--accent-soft); | |
| border-radius: 999px; | |
| border: 1px solid #bfdbfe; | |
| } | |
| .session-pill strong { | |
| color: var(--accent); | |
| font-family: var(--font-mono); | |
| font-weight: 500; | |
| font-size: 0.72rem; | |
| } | |
| code.pre { | |
| display: block; | |
| white-space: pre-wrap; | |
| font-family: var(--font-mono); | |
| font-size: 0.72rem; | |
| line-height: 1.5; | |
| background: #f8fafc; | |
| border: 1px solid var(--space-border); | |
| border-radius: 10px; | |
| padding: 12px 14px; | |
| color: #1e293b; | |
| min-height: 72px; | |
| max-height: 260px; | |
| overflow: auto; | |
| } | |
| .proof-grid { | |
| display: grid; | |
| gap: 16px; | |
| grid-template-columns: 1fr; | |
| } | |
| @media (min-width: 720px) { | |
| .proof-grid { grid-template-columns: 1fr 1fr; } | |
| } | |
| .proof-card { | |
| border-radius: var(--radius); | |
| overflow: hidden; | |
| border: 1px solid var(--space-border); | |
| background: var(--card); | |
| box-shadow: var(--card-shadow); | |
| } | |
| .proof-card figcaption { | |
| padding: 10px 14px; | |
| font-size: 0.8125rem; | |
| color: var(--muted); | |
| border-top: 1px solid var(--space-border); | |
| background: var(--space-bg-elevated); | |
| } | |
| .proof-card img { | |
| display: block; | |
| width: 100%; | |
| height: auto; | |
| } | |
| .metric-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-size: 0.9375rem; | |
| } | |
| .metric-table th, | |
| .metric-table td { | |
| text-align: left; | |
| padding: 10px 14px; | |
| border-bottom: 1px solid var(--space-border); | |
| vertical-align: top; | |
| } | |
| .metric-table thead th { | |
| font-weight: 700; | |
| color: var(--ink); | |
| background: var(--space-bg-elevated); | |
| font-size: 0.75rem; | |
| letter-spacing: 0.06em; | |
| text-transform: uppercase; | |
| } | |
| .metric-table tbody td:first-child { | |
| color: var(--ink); | |
| max-width: 28ch; | |
| } | |
| .metric-table tbody td:last-child { | |
| font-weight: 700; | |
| font-variant-numeric: tabular-nums; | |
| color: var(--accent); | |
| } | |
| .metric-table tbody tr:last-child td { | |
| border-bottom: none; | |
| } | |
| .benchmark-chart-grid { | |
| display: grid; | |
| gap: 16px; | |
| grid-template-columns: 1fr; | |
| margin-top: 18px; | |
| } | |
| @media (min-width: 900px) { | |
| .benchmark-chart-grid { | |
| grid-template-columns: repeat(3, minmax(0, 1fr)); | |
| } | |
| } | |
| /* Full-screen image viewer (click any .sde-zoomable) */ | |
| .img-lightbox { | |
| position: fixed; | |
| inset: 0; | |
| z-index: 200; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| padding: calc(8px + env(safe-area-inset-top, 0px)) calc(8px + env(safe-area-inset-right, 0px)) calc(8px + env(safe-area-inset-bottom, 0px)) calc(8px + env(safe-area-inset-left, 0px)); | |
| box-sizing: border-box; | |
| } | |
| .img-lightbox[hidden] { | |
| display: none ; | |
| } | |
| .img-lightbox-backdrop { | |
| position: absolute; | |
| inset: 0; | |
| border: none; | |
| padding: 0; | |
| margin: 0; | |
| width: 100%; | |
| height: 100%; | |
| cursor: zoom-out; | |
| background: rgba(15, 23, 42, 0.9); | |
| backdrop-filter: blur(8px); | |
| -webkit-backdrop-filter: blur(8px); | |
| } | |
| .img-lightbox-panel { | |
| position: relative; | |
| z-index: 1; | |
| width: min(98vw, 1920px); | |
| max-height: min(96vh, 1200px); | |
| display: flex; | |
| flex-direction: column; | |
| background: #0b1220; | |
| border-radius: var(--radius-lg); | |
| border: 1px solid rgba(148, 163, 184, 0.35); | |
| box-shadow: 0 28px 90px rgba(0, 0, 0, 0.55); | |
| overflow: hidden; | |
| } | |
| .img-lightbox-close { | |
| position: absolute; | |
| top: 8px; | |
| right: 10px; | |
| z-index: 3; | |
| width: 40px; | |
| height: 40px; | |
| border: none; | |
| border-radius: 10px; | |
| font-size: 1.5rem; | |
| line-height: 1; | |
| cursor: pointer; | |
| color: #f8fafc; | |
| background: rgba(30, 41, 59, 0.95); | |
| box-shadow: 0 2px 10px rgba(0, 0, 0, 0.25); | |
| } | |
| .img-lightbox-close:hover { | |
| background: rgba(51, 65, 85, 0.98); | |
| } | |
| .img-lightbox-toolbar { | |
| position: absolute; | |
| top: 8px; | |
| left: 10px; | |
| z-index: 3; | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 6px; | |
| } | |
| .img-lightbox-toolbar button { | |
| min-width: 40px; | |
| height: 36px; | |
| padding: 0 10px; | |
| font-size: 0.9rem; | |
| font-weight: 700; | |
| font-family: inherit; | |
| border-radius: 8px; | |
| border: 1px solid rgba(148, 163, 184, 0.4); | |
| cursor: pointer; | |
| color: #e2e8f0; | |
| background: rgba(30, 41, 59, 0.95); | |
| } | |
| .img-lightbox-toolbar button:hover { | |
| background: rgba(51, 65, 85, 0.98); | |
| } | |
| .img-lightbox-hint { | |
| display: block; | |
| margin-top: 8px; | |
| font-size: 0.72rem; | |
| color: rgba(148, 163, 184, 0.95); | |
| line-height: 1.35; | |
| } | |
| .img-lightbox-scroll { | |
| flex: 1; | |
| min-height: 120px; | |
| overflow: auto; | |
| -webkit-overflow-scrolling: touch; | |
| padding: 52px 14px 44px; | |
| text-align: center; | |
| } | |
| .img-lightbox-stage { | |
| display: inline-block; | |
| margin: 0 auto; | |
| } | |
| .img-lightbox-scroll img { | |
| display: block; | |
| max-width: none; | |
| max-height: none; | |
| margin: 0 auto; | |
| vertical-align: middle; | |
| /* width/height set in JS so overflow scroll tracks zoom */ | |
| } | |
| .img-lightbox-caption { | |
| margin: 0; | |
| padding: 10px 14px 12px; | |
| font-size: 0.8125rem; | |
| color: #cbd5e1; | |
| background: rgba(15, 23, 42, 0.6); | |
| border-top: 1px solid rgba(148, 163, 184, 0.25); | |
| text-align: center; | |
| } | |
| .link-list a { | |
| color: var(--accent); | |
| text-decoration: none; | |
| font-weight: 600; | |
| display: block; | |
| padding: 10px 0; | |
| border-bottom: 1px solid var(--space-border); | |
| font-size: 0.9rem; | |
| } | |
| .link-list a:last-child { border-bottom: 0; } | |
| .link-list a:hover { text-decoration: underline; } | |
| .space-footer { | |
| margin-top: auto; | |
| padding: 20px 16px calc(16px + var(--safe-bottom)); | |
| border-top: 1px solid var(--space-border); | |
| background: linear-gradient(180deg, var(--space-bg-elevated), var(--space-bg)); | |
| } | |
| .space-footer-inner { | |
| max-width: 1120px; | |
| margin: 0 auto; | |
| display: flex; | |
| flex-wrap: wrap; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| font-size: 0.8125rem; | |
| color: var(--muted); | |
| } | |
| .space-footer a { color: var(--muted); font-weight: 600; } | |
| .space-footer a:hover { color: var(--ink); } | |
| .blog-quote { | |
| border-left: 4px solid #2563eb; | |
| background: #eff6ff; | |
| color: #1e3a8a; | |
| padding: 10px 12px; | |
| border-radius: 8px; | |
| font-size: 0.9rem; | |
| margin: 0 0 12px; | |
| } | |
| .blog-mini-grid { | |
| display: grid; | |
| grid-template-columns: repeat(3, minmax(0, 1fr)); | |
| gap: 8px; | |
| margin: 0 0 12px; | |
| } | |
| .blog-mini { | |
| background: #f8fafc; | |
| border: 1px solid var(--space-border); | |
| border-radius: 10px; | |
| padding: 10px; | |
| font-size: 0.82rem; | |
| color: var(--muted); | |
| } | |
| .blog-mini b { color: var(--ink); display:block; font-size:0.98rem; margin-bottom: 2px; } | |
| @media (max-width: 900px) { | |
| .blog-mini-grid { grid-template-columns: 1fr; } | |
| } | |
| .lede-stack { | |
| max-width: 62ch; | |
| margin-bottom: 18px; | |
| } | |
| .lede-stack .lede { | |
| max-width: none; | |
| } | |
| .stat-callout { | |
| margin: 0 0 16px; | |
| padding: 14px 16px 16px; | |
| border-radius: var(--radius); | |
| border: 1px solid #c7d2fe; | |
| background: linear-gradient(135deg, #eef2ff 0%, #f8fafc 55%, #ecfeff 100%); | |
| box-shadow: 0 6px 22px rgba(37, 99, 235, 0.08); | |
| font-size: 0.98rem; | |
| line-height: 1.58; | |
| color: var(--ink-soft); | |
| } | |
| .stat-callout strong { | |
| color: var(--ink); | |
| font-weight: 700; | |
| } | |
| .blog-pull-wide { | |
| font-family: var(--font-display); | |
| font-size: 1.02rem; | |
| line-height: 1.45; | |
| color: var(--ink); | |
| margin: 18px 0 14px; | |
| padding: 12px 0 12px 16px; | |
| border-left: 4px solid var(--hf-amber); | |
| background: linear-gradient(90deg, var(--hf-amber-soft), transparent); | |
| border-radius: 0 10px 10px 0; | |
| } | |
| .blog-subhead { | |
| font-size: 0.72rem; | |
| font-weight: 800; | |
| letter-spacing: 0.12em; | |
| text-transform: uppercase; | |
| color: var(--muted); | |
| margin: 20px 0 8px; | |
| } | |
| .blog-list { | |
| margin: 0 0 14px 1.1rem; | |
| padding: 0; | |
| color: var(--muted); | |
| font-size: 0.9375rem; | |
| line-height: 1.55; | |
| } | |
| .blog-list li { margin-bottom: 8px; } | |
| .blog-footnote { | |
| font-size: 0.78rem; | |
| color: var(--muted-light); | |
| line-height: 1.45; | |
| margin: 10px 0 0; | |
| padding-top: 10px; | |
| border-top: 1px dashed var(--space-border); | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="space-shell"> | |
| <header class="space-banner"> | |
| <div class="space-banner-inner"> | |
| <div class="space-brand"> | |
| <div class="space-logo" aria-hidden="true"></div> | |
| <div> | |
| <h1>SQL Debug Environment</h1> | |
| <p>OpenEnv · FastAPI · Live SQL rewards</p> | |
| </div> | |
| </div> | |
| <div class="space-actions"> | |
| <a class="btn-primary" href="/gradio/">Gradio UI</a> | |
| <a class="btn-ghost" href="https://github.com/mdayan8/sql-debug-env.git" target="_blank" rel="noopener">GitHub</a> | |
| <button type="button" class="btn-ghost" id="btnOpenTab" title="Opens this demo in a full browser tab">Open full page</button> | |
| <a class="btn-ghost" href="https://huggingface.co/spaces/md896/sql-debug-env" target="_blank" rel="noopener">Space on Hub ↗</a> | |
| </div> | |
| </div> | |
| </header> | |
| <nav class="sticky-nav" aria-label="On-page navigation"> | |
| <div class="sticky-nav-inner"> | |
| <a href="#environment">Environment</a> | |
| <a href="#first-training">First Training</a> | |
| <a href="#playground">Playground</a> | |
| <a href="#benchmark-visuals">Benchmark</a> | |
| <a href="#evidence">Evidence</a> | |
| <a href="#repro">Reproduce</a> | |
| <a href="/gradio/">Gradio</a> | |
| </div> | |
| </nav> | |
| <main class="main"> | |
| <div class="api-strip" aria-label="Key API endpoints"> | |
| <span class="api-chip"><span>GET</span>/health</span> | |
| <span class="api-chip"><span>GET</span>/tasks</span> | |
| <span class="api-chip"><span>POST</span>/reset</span> | |
| <span class="api-chip"><span>POST</span>/step</span> | |
| <span class="api-chip"><span>POST</span>/step_with_review</span> | |
| <span class="api-chip"><span>GET</span>/benchmark</span> | |
| </div> | |
| <section id="environment" class="section" aria-labelledby="env-title"> | |
| <p class="section-id">Space · Architecture</p> | |
| <h2 class="hero-title" id="env-title">Environment first — <em>how</em> the agent sees the world.</h2> | |
| <div class="lede-stack"> | |
| <p class="stat-callout"> | |
| <strong>Today, nearly 30% of a data team’s time is spent fixing SQL and pipeline logic</strong>—not building net-new insights, not shipping product features, | |
| but <em>debugging queries that already looked reasonable in a notebook or PR comment</em>. That tax shows up as rework, stale dashboards, and fragile “one-off” | |
| analyses that nobody trusts after the third incident. | |
| </p> | |
| <p class="lede"> | |
| <strong>Even with the most advanced AI models, the problem is not “solved.”</strong> | |
| On standard text-to-SQL benchmarks like Spider, headline numbers often sit in the <strong>high 80s to low 90s (%)</strong>—an impressive story for a slide deck. | |
| In real enterprise environments—drifting schemas, implicit business rules, join explosions, and permissioned views—that headline rarely survives contact with production. | |
| Teams routinely report effective success rates closer to the <strong>10–30%</strong> band unless the system closes the loop with <em>execution-grounded feedback</em> | |
| (run, observe error or result, attribute reward to what changed). | |
| </p> | |
| <p class="lede" style="margin-bottom:0"> | |
| This Space hosts the same HTTP API your trainer calls: <strong>sessions</strong>, <strong>typed observations</strong>, <strong>SQLite-backed tasks</strong>, and a | |
| <strong>decomposed reward</strong>. Below is the end-to-end workflow map at a glance; Engineering Notes connect the problem to the OpenEnv contract and the artifacts on this page. | |
| </p> | |
| </div> | |
| <div class="layer-strip" aria-hidden="true"> | |
| <span class="layer"><b>Client</b> / agent</span> | |
| <span class="layer"><b>API</b> session + JSON</span> | |
| <span class="layer"><b>Env</b> SQLDebugEnv</span> | |
| <span class="layer"><b>Data</b> tasks + SQLite</span> | |
| <span class="layer"><b>Train</b> GRPO + artifacts</span> | |
| </div> | |
| <div class="panel"> | |
| <div class="panel-header"> | |
| <h2>Environment visualization</h2> | |
| <p class="caption">Runtime flow (solid) vs training and ops (dashed). Reviewer-guarded path optional for safer rollouts.</p> | |
| </div> | |
| <div class="diagram-wrap"> | |
| <img class="sde-zoomable" src="/static/diagram-end-to-end-workflow.png" alt="End-to-end workflow: Client, FastAPI, environment core, data and reward layer, training and deployment." width="1600" height="900" loading="eager" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| </div> | |
| <div class="figure-footer"> | |
| <div class="legend"> | |
| <span class="l-api">API</span> | |
| <span class="l-env">Env core</span> | |
| <span class="l-data">DB / tasks / reward</span> | |
| <span class="l-train">Training & Space</span> | |
| </div> | |
| <span>sql-debug-env workflow</span> | |
| </div> | |
| </div> | |
| <div class="badges"> | |
| <span class="badge">OpenEnv</span> | |
| <span class="badge">TRL · GRPO</span> | |
| <span class="badge">Live rewards</span> | |
| <span class="badge">Reviewer path</span> | |
| </div> | |
| </section> | |
| <section id="first-training" class="section" aria-labelledby="first-training-title"> | |
| <div class="section-head"> | |
| <p class="section-id">Training · First Context</p> | |
| <h2 id="first-training-title">Start with the first bridge run</h2> | |
| <p>This is the exact first training context you shared: dependency bootstrap, W&B tracking, then benchmark/eval steps.</p> | |
| </div> | |
| <div class="grid cols-12"> | |
| <div class="play-card span-4"> | |
| <div class="link-list"> | |
| <a href="https://colab.research.google.com/drive/1H6SLfCBhHzRJtnymLgevjfyytWUximF5#scrollTo=j-9MptXvmPk8" target="_blank" rel="noopener">First training context (Colab anchor)</a> | |
| <a href="https://colab.research.google.com/drive/1H6SLfCBhHzRJtnymLgevjfyytWUximF5#scrollTo=x5YuvatGyyu_" target="_blank" rel="noopener">Full training notebook anchor</a> | |
| <a href="https://wandb.ai/mdayanbag-pesitm/sql-debug-grpo-best-budget/workspace?nw=nwusermdayanbag" target="_blank" rel="noopener">W&B workspace: sql-debug-grpo-best-budget</a> | |
| <a href="https://huggingface.co/spaces/md896/sql-debug-env/tree/main/artifacts/runs/20260426-064318-sample-rewards-32eval" target="_blank" rel="noopener">Sample rewards (32-eval) artifacts</a> | |
| <a href="https://huggingface.co/md896/sql-debug-agent-qwen25-05b-grpo-wandb-continue-v2" target="_blank" rel="noopener">Model card (winner)</a> | |
| </div> | |
| </div> | |
| <div class="play-card span-8"> | |
| <label>First training context code</label> | |
| <code class="pre"># SQL Debug Env: FINAL REAL-WORLD BRIDGE | |
| import os | |
| print("Checking libraries...") | |
| os.system("pip install trl accelerate wandb -U") | |
| import httpx | |
| import torch | |
| import wandb | |
| # W&B workspace: https://wandb.ai/mdayanbag-pesitm/sql-debug-grpo-best-budget/workspace?nw=nwusermdayanbag</code> | |
| </div> | |
| </div> | |
| </section> | |
| <section id="playground" class="section" aria-labelledby="play-title"> | |
| <div class="section-head"> | |
| <p class="section-id">Live · Playground</p> | |
| <h2 id="play-title">Try <code style="font-family:var(--font-mono);font-size:0.85em;background:#f1f5f9;padding:2px 6px;border-radius:4px">/reset</code> and <code style="font-family:var(--font-mono);font-size:0.85em;background:#f1f5f9;padding:2px 6px;border-radius:4px">/step</code> from the browser</h2> | |
| <p>Use the same <strong>X-Session-Id</strong> header on every call (here: <code style="font-family:var(--font-mono);font-size:0.85em">demo-session</code>).</p> | |
| </div> | |
| <div class="grid cols-2"> | |
| <div class="play-card"> | |
| <label for="taskId">Task</label> | |
| <select id="taskId" aria-label="Select task"> | |
| <option value="easy_syntax_fix">easy_syntax_fix</option> | |
| <option value="medium_logic_fix">medium_logic_fix</option> | |
| <option value="hard_multi_bug">hard_multi_bug</option> | |
| <option value="hard_finance_explosion">hard_finance_explosion</option> | |
| </select> | |
| <button type="button" class="btn-action" id="btnReset" onclick="resetTask()">Reset task</button> | |
| <label for="query">Candidate SQL</label> | |
| <textarea id="query" placeholder="SELECT ..." aria-label="SQL query"></textarea> | |
| <button type="button" class="btn-action" id="btnSubmit" onclick="submitQuery()">Submit query</button> | |
| </div> | |
| <div class="play-card"> | |
| <div class="session-pill">Session <strong>demo-session</strong></div> | |
| <label>Task observation</label> | |
| <code id="observation" class="pre">Run “Reset task” to load the broken query and observation JSON.</code> | |
| <label style="margin-top:14px">Step result</label> | |
| <code id="result" class="pre">Submit a query to see reward, done, and info.</code> | |
| </div> | |
| </div> | |
| </section> | |
| <section id="benchmark-visuals" class="section" aria-labelledby="benchmark-visuals-title"> | |
| <div class="section-head"> | |
| <p class="section-id">Evidence · Charts</p> | |
| <h2 id="benchmark-visuals-title">Benchmark visuals</h2> | |
| <p>Metric snapshot aligned with committed charts under <code style="font-family:var(--font-mono);font-size:0.85em;background:#f1f5f9;padding:2px 6px;border-radius:4px">server/static/</code> (same figures as the Gradio page).</p> | |
| </div> | |
| <div class="panel"> | |
| <table class="metric-table" aria-label="Benchmark metric snapshot"> | |
| <thead> | |
| <tr> | |
| <th scope="col">Metric snapshot</th> | |
| <th scope="col">Value</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr> | |
| <td>Spider chart: Industry baseline</td> | |
| <td>48.2%</td> | |
| </tr> | |
| <tr> | |
| <td>Spider chart: Qwen-7B base</td> | |
| <td>52.4%</td> | |
| </tr> | |
| <tr> | |
| <td>Spider chart: RL agent</td> | |
| <td>78.5%</td> | |
| </tr> | |
| <tr> | |
| <td>Performance leap chart</td> | |
| <td>0.0% -> 25.0% (base to RL in that run view)</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| <div class="benchmark-chart-grid"> | |
| <figure class="proof-card"> | |
| <img class="sde-zoomable" src="/static/chart-performance-leap.png" alt="Performance leap: baseline versus RL on a Spider-style headline view" width="900" height="520" loading="lazy" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| <figcaption>Performance leap (Spider-style)</figcaption> | |
| </figure> | |
| <figure class="proof-card"> | |
| <img class="sde-zoomable" src="/static/chart-comparison-shift.png" alt="Comparison of models and reward distribution shift" width="900" height="520" loading="lazy" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| <figcaption>Comparison + reward shift</figcaption> | |
| </figure> | |
| <figure class="proof-card"> | |
| <img class="sde-zoomable" src="/static/chart-spider-benchmark.png" alt="Spider-style benchmark headline chart across industry baseline, Qwen-7B base, and RL agent" width="900" height="520" loading="lazy" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| <figcaption>Spider-style headline chart</figcaption> | |
| </figure> | |
| </div> | |
| </section> | |
| <section id="evidence" class="section" aria-labelledby="evidence-title"> | |
| <div class="section-head"> | |
| <p class="section-id">Evidence · Artifacts</p> | |
| <h2 id="evidence-title">Training plots from real runs</h2> | |
| <p>Regenerate with <code style="font-family:var(--font-mono);font-size:0.85em">presentation_graphs.py</code>; commit PNGs under <code style="font-family:var(--font-mono);font-size:0.85em">server/static/</code>.</p> | |
| </div> | |
| <div class="proof-grid"> | |
| <figure class="proof-card"> | |
| <img class="sde-zoomable" src="/static/proof-combo.png" alt="Presentation combo chart from training run" width="1200" height="800" loading="lazy" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| <figcaption>Presentation combo — logged metrics.</figcaption> | |
| </figure> | |
| <figure class="proof-card"> | |
| <img class="sde-zoomable" src="/static/proof-distribution-shift.png" alt="Reward distribution shift" width="1200" height="800" loading="lazy" decoding="async" title="Click to open full-size viewer (zoom and pan)" /> | |
| <figcaption>Per-sample reward shift (baseline vs trained).</figcaption> | |
| </figure> | |
| </div> | |
| <div class="link-list" style="margin-top:12px"> | |
| <a href="/static/training_reward_curve_final.png" target="_blank" rel="noopener">training_reward_curve_final.png</a> | |
| <a href="/static/training_diagnostics_dual_axis_final.png" target="_blank" rel="noopener">training_diagnostics_dual_axis_final.png</a> | |
| <a href="/static/baseline_vs_trained_by_task_final.png" target="_blank" rel="noopener">baseline_vs_trained_by_task_final.png</a> | |
| <a href="/static/task_delta_post_minus_base_final.png" target="_blank" rel="noopener">task_delta_post_minus_base_final.png</a> | |
| <a href="/static/reward_distribution_shift_red_green_final.png" target="_blank" rel="noopener">reward_distribution_shift_red_green_final.png</a> | |
| <a href="/static/presentation_combo_final.png" target="_blank" rel="noopener">presentation_combo_final.png</a> | |
| <a href="/static/benchmark_style_summary_final.png" target="_blank" rel="noopener">benchmark_style_summary_final.png</a> | |
| <a href="/static/checkpoint_leaderboard_step_vs_reward_final.png" target="_blank" rel="noopener">checkpoint_leaderboard_step_vs_reward_final.png</a> | |
| <a href="/static/cost_vs_performance_final.png" target="_blank" rel="noopener">cost_vs_performance_final.png</a> | |
| </div> | |
| </section> | |
| <section id="repro" class="section"> | |
| <div class="grid cols-12"> | |
| <div class="play-card span-4"> | |
| <div class="section-head" style="margin-bottom:10px"> | |
| <p class="section-id">Reproduce</p> | |
| <h2 style="font-family:var(--font-display);font-size:1.15rem;margin:0;font-weight:600">Runs & assets</h2> | |
| </div> | |
| <div class="link-list"> | |
| <a href="https://colab.research.google.com/drive/1H6SLfCBhHzRJtnymLgevjfyytWUximF5#scrollTo=x5YuvatGyyu_" target="_blank" rel="noopener">Colab training notebook</a> | |
| <a href="https://huggingface.co/spaces/md896/sql-debug-env/tree/main/artifacts/runs/20260426-060502-final-pass-32eval" target="_blank" rel="noopener">Eval artifacts (32-run)</a> | |
| <a href="https://huggingface.co/md896/sql-debug-agent-qwen25-05b-grpo-wandb-continue-v2" target="_blank" rel="noopener">Model card</a> | |
| <a href="/benchmark" target="_blank" rel="noopener">Benchmark JSON</a> | |
| <a href="/health" target="_blank" rel="noopener">Health</a> | |
| </div> | |
| </div> | |
| <div class="play-card span-8"> | |
| <div class="section-head" style="margin-bottom:10px"> | |
| <p class="section-id">Engineering Notes</p> | |
| <h2 style="font-family:var(--font-display);font-size:1.15rem;margin:0;font-weight:600">Why I picked SQL debugging and why this architecture exists</h2> | |
| </div> | |
| <div class="blog-quote"> | |
| “The goal is not to generate beautiful SQL text. The goal is to produce SQL fixes that survive execution, repeatedly, under changing runtime conditions.” | |
| </div> | |
| <div class="blog-mini-grid"> | |
| <div class="blog-mini"><b>0.5B → 7B</b>Bridge run for wiring, then a stronger base model for SQL structure and joins.</div> | |
| <div class="blog-mini"><b>32-run eval</b>Artifact-backed pass with sample rewards and run logs you can diff, not vibes.</div> | |
| <div class="blog-mini"><b>Execution-first</b>Reward comes from running SQL against graded tasks—not from how persuasive the completion sounds.</div> | |
| </div> | |
| <div class="blog-mini-grid" style="margin-top:10px"> | |
| <div class="blog-mini"><b>Spider vs prod</b>Leaderboards reward clean splits; warehouses reward joins that do not explode under skew.</div> | |
| <div class="blog-mini"><b>GRPO loop</b>Group-relative updates turn execution outcomes into a stable training signal across sessions.</div> | |
| <div class="blog-mini"><b>Reviewer path</b>Optional guardrail so risky SQL is blocked without erasing every learning opportunity.</div> | |
| </div> | |
| <p class="blog-pull-wide"> | |
| If you only remember one tension from this page, remember this: <strong>high leaderboard accuracy is not the same thing as high production reliability.</strong> | |
| </p> | |
| <p style="color:var(--muted);margin:0 0 12px;font-size:0.9375rem"> | |
| The motive for this project was not to build another text-to-SQL demo. It was to shrink the gap between “model looks smart in a demo” and “model helps engineers ship.” | |
| SQL bugs are expensive because they fail late: a query can pass review, pass linting, and still break under real schema constraints, stale statistics, or join cardinality shifts. | |
| I picked this problem because it sits at the boundary between language modeling and systems engineering—if the agent improves here, it is learning runtime correctness, not cosmetic fluency. | |
| </p> | |
| <p class="blog-subhead">What leaderboards hide</p> | |
| <p style="color:var(--muted);margin:0 0 12px;font-size:0.9375rem"> | |
| Spider-style suites are useful scientific instruments: they keep comparisons honest and reproducible. They are also intentionally cleaner than most corporate warehouses. | |
| That is why you can simultaneously believe two facts that sound contradictory: models can score in the <strong>high 80s–90s (%)</strong> on canonical benchmarks while practitioners still describe | |
| <strong>10–30%</strong> “works first time in our environment” outcomes unless they invest in evaluation harnesses, guardrails, and iterative repair loops grounded in execution. | |
| </p> | |
| <ul class="blog-list"> | |
| <li><strong>Latency of truth.</strong> Text-only feedback arrives early; execution feedback arrives when the query meets the database. The latter is slower but decisive.</li> | |
| <li><strong>Credit assignment.</strong> Without runtime signal, you reward plausible prose. With it, you reward schema-correct joins, stable aggregates, and safe rewrites.</li> | |
| <li><strong>Operational drift.</strong> Production schemas evolve; a static snapshot benchmark cannot represent every enterprise edge case—so the training surface must be repeatable even when the world is messy.</li> | |
| </ul> | |
| <p class="blog-subhead">Why the OpenEnv-shaped API exists</p> | |
| <p style="color:var(--muted);margin:0 0 12px;font-size:0.9375rem"> | |
| The architecture follows an OpenEnv-style contract: | |
| <code>reset → observation</code> and <code>step(action) → observation, reward, done, info</code>. | |
| Each episode runs on isolated in-memory SQLite state, deterministic task grading, and execution-grounded rewards. That contract is what lets you compare runs, swap algorithms, | |
| and keep the same measurement tape: valid table references, stable aggregations, and join logic that does not collapse in edge cases. | |
| </p> | |
| <code class="pre">Conceptual reward: | |
| R_t = w_c*C_t + w_e*E_t + w_p*P_t + w_s*S_t - lambda*Penalty_t | |
| Objective: | |
| J(pi) = E_{tau ~ pi}[sum_{t=0..T} gamma^t * R_t]</code> | |
| <p style="color:var(--muted);margin:0 0 12px;font-size:0.9375rem"> | |
| The technical design makes debugging measurable. Session state exposes observations, action history, and reward trajectories. | |
| The reviewer-gated path adds risk control for unsafe submissions while preserving gradient signal (instead of hard-failing every risky step). | |
| That gives the policy consequences it can learn from: what failed, why it failed, and how far a candidate moved toward a valid fix. | |
| </p> | |
| <code class="pre">Data snapshot shown on this page: | |
| - Spider-style industry baseline: 48.2% | |
| - Qwen-7B base: 52.4% | |
| - RL agent headline: 78.5% | |
| - Performance leap view: 0.0% -> 25.0% | |
| - Hard evidence: 32-run eval + sample reward artifacts</code> | |
| <p style="color:var(--muted);margin:12px 0 12px;font-size:0.9375rem"> | |
| Traceability is a product decision, not a footnote. This page is an evidence chain: first training context, live interaction, then artifact-backed plots. | |
| If a metric appears, it should map to concrete run folders, reward JSON files, and checkpoint lineage—so a reviewer can reconstruct the claim without trusting a single screenshot. | |
| </p> | |
| <p class="blog-subhead">How to read what ships here</p> | |
| <ul class="blog-list"> | |
| <li><strong>Environment diagram</strong> — the contract between client, API, env core, data layer, and training artifacts.</li> | |
| <li><strong>Playground</strong> — the same <code>/reset</code> and <code>/step</code> loop your trainer uses, in-browser, with explicit session headers.</li> | |
| <li><strong>Benchmark visuals + evidence PNGs</strong> — static exports committed under <code>server/static/</code>; regenerate from real run JSON when you change the story.</li> | |
| </ul> | |
| <p style="color:var(--muted);margin:0 0 12px;font-size:0.9375rem"> | |
| Industry and research converge on the same diagnosis: robust text-to-SQL needs context quality, intent handling, dialect robustness, and execution safeguards. | |
| Enterprise SQL debugging stays painful when feedback is detached from runtime behavior. The objective of this Space is to close that gap with a reproducible, | |
| execution-grounded learning loop you can fork, stress-test, and defend in a review. | |
| </p> | |
| <p class="blog-footnote"> | |
| Percent ranges (≈30% time on debugging work; ≈10–30% production success vs high-80s/90s benchmark headlines) summarize common practitioner reporting and public benchmark narratives; | |
| your organization’s distributions will differ—treat them as motivation for measurement, not as universal constants. | |
| </p> | |
| <div class="link-list" style="margin-top:12px"> | |
| <a href="https://github.com/mdayan8/sql-debug-env.git" target="_blank" rel="noopener">GitHub — mdayan8/sql-debug-env</a> | |
| <a href="https://cloud.google.com/blog/products/databases/techniques-for-improving-text-to-sql" target="_blank" rel="noopener">Google Cloud: techniques for improving text-to-SQL</a> | |
| <a href="https://arxiv.org/abs/2601.18119" target="_blank" rel="noopener">OurBench / Squirrel: enterprise SQL debugging benchmark</a> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| </main> | |
| <footer class="space-footer"> | |
| <div class="space-footer-inner"> | |
| <span>Custom Space UI · FastAPI <code style="font-family:var(--font-mono);font-size:0.75em">/demo</code></span> | |
| <span> | |
| <a href="https://github.com/mdayan8/sql-debug-env.git" target="_blank" rel="noopener">GitHub</a> | |
| · | |
| <a href="https://huggingface.co/docs/hub/spaces" target="_blank" rel="noopener">Spaces docs</a> | |
| · | |
| <a href="https://huggingface.co/spaces/md896/sql-debug-env/tree/main" target="_blank" rel="noopener">Files & versions</a> | |
| </span> | |
| </div> | |
| </footer> | |
| </div> | |
| <div id="imgLightbox" class="img-lightbox" hidden role="dialog" aria-modal="true" aria-label="Full-size image viewer"> | |
| <button type="button" class="img-lightbox-backdrop" id="imgLightboxBackdrop" aria-label="Close viewer"></button> | |
| <div class="img-lightbox-panel"> | |
| <button type="button" class="img-lightbox-close" id="imgLightboxClose" aria-label="Close">×</button> | |
| <div class="img-lightbox-toolbar"> | |
| <button type="button" id="lbZoomOut" title="Zoom out" aria-label="Zoom out">−</button> | |
| <button type="button" id="lbZoomReset" title="Reset zoom" aria-label="Reset zoom">100%</button> | |
| <button type="button" id="lbZoomIn" title="Zoom in" aria-label="Zoom in">+</button> | |
| </div> | |
| <div class="img-lightbox-scroll" id="imgLightboxScroll"> | |
| <div class="img-lightbox-stage" id="imgLightboxStage"> | |
| <img id="imgLightboxImg" src="" alt="" decoding="async" /> | |
| </div> | |
| </div> | |
| <p class="img-lightbox-caption"> | |
| <span id="imgLightboxCaption"></span> | |
| <span class="img-lightbox-hint">Scroll to pan · +/− or Ctrl+scroll to zoom · dark area or Esc to close</span> | |
| </p> | |
| </div> | |
| </div> | |
| <script> | |
| (function () { | |
| var btn = document.getElementById("btnOpenTab"); | |
| if (btn) { | |
| btn.addEventListener("click", function () { | |
| try { | |
| window.open(window.location.href, "_blank", "noopener,noreferrer"); | |
| } catch (e) { | |
| window.location.href = window.location.href; | |
| } | |
| }); | |
| } | |
| })(); | |
| const sessionId = "demo-session"; | |
| function setLoading(which, on) { | |
| var el = document.getElementById(which); | |
| if (!el) return; | |
| el.disabled = on; | |
| if (on && !el.dataset.label) el.dataset.label = el.textContent; | |
| el.textContent = on ? "Please wait…" : (el.dataset.label || el.textContent); | |
| } | |
| async function resetTask() { | |
| setLoading("btnReset", true); | |
| try { | |
| const taskId = document.getElementById("taskId").value; | |
| const resp = await fetch("/reset", { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "X-Session-Id": sessionId | |
| }, | |
| body: JSON.stringify({ task_id: taskId }) | |
| }); | |
| const data = await resp.json(); | |
| document.getElementById("observation").textContent = JSON.stringify(data, null, 2); | |
| const broken = data && data.observation && data.observation.original_query; | |
| document.getElementById("query").value = broken || ""; | |
| } finally { | |
| setLoading("btnReset", false); | |
| } | |
| } | |
| async function submitQuery() { | |
| setLoading("btnSubmit", true); | |
| try { | |
| const query = document.getElementById("query").value; | |
| const payload = { | |
| action: { | |
| action_type: "submit_query", | |
| query: query | |
| } | |
| }; | |
| const resp = await fetch("/step", { | |
| method: "POST", | |
| headers: { | |
| "Content-Type": "application/json", | |
| "X-Session-Id": sessionId | |
| }, | |
| body: JSON.stringify(payload) | |
| }); | |
| const data = await resp.json(); | |
| document.getElementById("result").textContent = JSON.stringify(data, null, 2); | |
| } finally { | |
| setLoading("btnSubmit", false); | |
| } | |
| } | |
| (function imageLightbox() { | |
| var main = document.querySelector("main"); | |
| var lb = document.getElementById("imgLightbox"); | |
| var stage = document.getElementById("imgLightboxStage"); | |
| var lbImg = document.getElementById("imgLightboxImg"); | |
| var cap = document.getElementById("imgLightboxCaption"); | |
| var scrollEl = document.getElementById("imgLightboxScroll"); | |
| var closeBtn = document.getElementById("imgLightboxClose"); | |
| var backdrop = document.getElementById("imgLightboxBackdrop"); | |
| var zIn = document.getElementById("lbZoomIn"); | |
| var zOut = document.getElementById("lbZoomOut"); | |
| var zReset = document.getElementById("lbZoomReset"); | |
| if (!main || !lb || !stage || !lbImg || !scrollEl) return; | |
| var scale = 1; | |
| function applyZoomedSize() { | |
| var nw = lbImg.naturalWidth; | |
| var nh = lbImg.naturalHeight; | |
| if (!nw || !nh) return; | |
| lbImg.style.width = nw * scale + "px"; | |
| lbImg.style.height = nh * scale + "px"; | |
| } | |
| function clearZoomedSize() { | |
| lbImg.style.width = ""; | |
| lbImg.style.height = ""; | |
| } | |
| function setScale(next) { | |
| scale = Math.min(4, Math.max(0.25, next)); | |
| applyZoomedSize(); | |
| } | |
| function centerScroll() { | |
| var el = scrollEl; | |
| el.scrollLeft = Math.max(0, (el.scrollWidth - el.clientWidth) / 2); | |
| el.scrollTop = Math.max(0, (el.scrollHeight - el.clientHeight) / 2); | |
| } | |
| function openFrom(thumb) { | |
| clearZoomedSize(); | |
| lbImg.removeAttribute("src"); | |
| scale = 1; | |
| lbImg.onload = function () { | |
| lbImg.onload = null; | |
| applyZoomedSize(); | |
| requestAnimationFrame(function () { | |
| centerScroll(); | |
| closeBtn.focus(); | |
| }); | |
| }; | |
| lbImg.src = thumb.currentSrc || thumb.src; | |
| lbImg.alt = thumb.getAttribute("alt") || ""; | |
| var fig = thumb.closest("figure"); | |
| var fc = fig && fig.querySelector("figcaption"); | |
| var capText = fc ? fc.textContent.replace(/\s+/g, " ").trim() : ""; | |
| if (!capText) { | |
| capText = (thumb.getAttribute("alt") || "Image").trim(); | |
| if (capText.length > 140) capText = capText.slice(0, 137) + "…"; | |
| } | |
| cap.textContent = capText; | |
| lb.hidden = false; | |
| lb.setAttribute("aria-hidden", "false"); | |
| document.body.style.overflow = "hidden"; | |
| if (lbImg.complete && lbImg.naturalWidth) { | |
| lbImg.onload(); | |
| } | |
| } | |
| function closeLb() { | |
| lb.hidden = true; | |
| lb.setAttribute("aria-hidden", "true"); | |
| lbImg.onload = null; | |
| lbImg.removeAttribute("src"); | |
| clearZoomedSize(); | |
| scale = 1; | |
| document.body.style.overflow = ""; | |
| } | |
| main.addEventListener("click", function (ev) { | |
| var t = ev.target; | |
| if (t && t.tagName === "IMG" && t.classList.contains("sde-zoomable")) { | |
| ev.preventDefault(); | |
| openFrom(t); | |
| } | |
| }); | |
| closeBtn.addEventListener("click", closeLb); | |
| backdrop.addEventListener("click", closeLb); | |
| zIn.addEventListener("click", function () { setScale(scale * 1.25); }); | |
| zOut.addEventListener("click", function () { setScale(scale / 1.25); }); | |
| zReset.addEventListener("click", function () { setScale(1); centerScroll(); }); | |
| lb.addEventListener("wheel", function (ev) { | |
| if (lb.hidden) return; | |
| if (!ev.ctrlKey && !ev.metaKey) return; | |
| ev.preventDefault(); | |
| setScale(scale * (ev.deltaY < 0 ? 1.1 : 0.9)); | |
| }, { passive: false }); | |
| document.addEventListener("keydown", function (ev) { | |
| if (lb.hidden) return; | |
| if (ev.key === "Escape") { | |
| ev.preventDefault(); | |
| closeLb(); | |
| } | |
| }); | |
| })(); | |
| </script> | |
| </body> | |
| </html> | |