Spaces:

RealMati
/

t2sql-demo

Sleeping

App Files Files Community

RealMati commited on 15 days ago

Commit

f0e347c

verified ·

1 Parent(s): eec5488

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +310 -94

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import gradio as gr
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 MODEL_ID = "RealMati/t2sql_v6_structured"
@@ -13,9 +14,9 @@ model.eval()
 print("Model loaded.")
 AGG_OPS = ["", "MAX", "MIN", "COUNT", "SUM", "AVG"]
 OPS = ["=", ">", "<", ">=", "<=", "!="]
-# Load CSS from external file
 css_path = os.path.join(os.path.dirname(__file__), "style.css")
 with open(css_path, "r") as f:
     CSS = f.read()
@@ -90,7 +91,7 @@ def format_parsed(sel, agg, conds, columns):
     elif sel is not None:
         parts.append(f"Column index: {sel}")
     if agg is not None:
-        agg_label = AGG_OPS[agg] if agg < len(AGG_OPS) and agg > 0 else "None"
         parts.append(f"Aggregation: {agg_label}")
     if conds:
         cond_strs = []
@@ -98,23 +99,38 @@ def format_parsed(sel, agg, conds, columns):
             c_name = columns[c_idx] if c_idx < len(columns) else f"col{c_idx}"
             op_str = OPS[c_op] if c_op < len(OPS) else "="
             cond_strs.append(f"{c_name} {op_str} {c_val}")
-        parts.append(f"Conditions: {', '.join(cond_strs)}")
     else:
         parts.append("Conditions: None")
-    return " | ".join(parts)
 def predict(question, schema, num_beams, max_length):
-    if not question.strip():
-        return "", "", ""
     table_name, columns = parse_schema(schema)
     input_text = f"translate to SQL: {question}"
     if schema.strip():
         input_text += f" | schema: {schema.strip()}"
     inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
@@ -123,6 +139,7 @@ def predict(question, schema, num_beams, max_length):
             early_stopping=True,
             do_sample=False,
         )
     raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
     sel, agg, conds = decode_structured_output(raw_output)
@@ -130,10 +147,12 @@ def predict(question, schema, num_beams, max_length):
     if sel is not None and agg is not None and columns:
         sql = structured_to_sql(sel, agg, conds, columns, table_name)
     else:
-        sql = "(Provide a schema to convert structured output to SQL)"
-    parsed = format_parsed(sel, agg, conds, columns) if sel is not None else ""
-    return sql, raw_output, parsed
 theme = gr.themes.Soft(
@@ -144,107 +163,304 @@ theme = gr.themes.Soft(
     font_mono=gr.themes.GoogleFont("Fira Code"),
 )
-with gr.Blocks(title="Text-to-SQL Demo") as demo:
-    # Header
     gr.HTML("""
     <div class="main-header">
         <h1>Text-to-SQL</h1>
-        <p>Fine-tuned T5 model that converts natural language questions
-        into structured SQL queries using the WikiSQL dataset</p>
     </div>
     """)
-    # Pipeline visualization - dark background so text is always visible
     gr.HTML("""
-    <div class="pipeline-box">
-        <span class="stage">Natural Language</span>
-        <span class="arrow"> &rarr; </span>
-        <span class="stage">T5 Encoder</span>
-        <span class="arrow"> &rarr; </span>
-        <span class="highlight">Structured Tokens (SEL | AGG | CONDS)</span>
-        <span class="arrow"> &rarr; </span>
-        <span class="stage">SQL Query</span>
     </div>
     """)
-    with gr.Row(equal_height=True):
-        with gr.Column(scale=1):
-            gr.Markdown("### Input", elem_classes=["section-header"])
-            question = gr.Textbox(
-                label="Natural Language Question",
-                placeholder="e.g. What is terrence ross' nationality?",
-                lines=2,
-            )
-            schema = gr.Textbox(
-                label="Database Schema",
-                placeholder="table_name: col1, col2, col3, ...",
-                lines=2,
-                info="Format: table_name: column1, column2, column3",
-            )
-            with gr.Row():
-                beams = gr.Slider(
-                    minimum=1, maximum=10, value=5, step=1,
-                    label="Beam Size",
-                    info="Higher = better quality, slower",
-                )
-                max_len = gr.Slider(
-                    minimum=64, maximum=512, value=256, step=64,
-                    label="Max Length",
-                )
-            btn = gr.Button("Generate SQL", variant="primary", elem_classes=["generate-btn"])
-        with gr.Column(scale=1):
-            gr.Markdown("### Output", elem_classes=["section-header"])
-            sql_out = gr.Textbox(
-                label="Generated SQL",
-                lines=3,
-                elem_classes=["sql-output"],
             )
-            raw_out = gr.Textbox(
-                label="Raw Model Output (Structured Tokens)",
-                lines=1,
-                elem_classes=["raw-output"],
             )
-            parsed_out = gr.Textbox(
-                label="Decoded Components",
-                lines=1,
-                elem_classes=["raw-output"],
             )
-    btn.click(
-        fn=predict,
-        inputs=[question, schema, beams, max_len],
-        outputs=[sql_out, raw_out, parsed_out],
-    )
-    gr.Markdown("### Try These Examples", elem_classes=["section-header"])
-    gr.Examples(
-        examples=[
-            ["What is terrence ross' nationality", "players: Player, No., Nationality, Position, Years in Toronto, School/Club Team", 5, 256],
-            ["how many schools or teams had jalen rose", "players: Player, No., Nationality, Position, Years in Toronto, School/Club Team", 5, 256],
-            ["What was the date of the race in Misano?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
-            ["What was the number of race that Kevin Curtain won?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
-            ["Where was Assen held?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
-            ["How many different positions did Sherbrooke Faucons (qmjhl) provide in the draft?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
-            ["What are the nationalities of the player picked from Thunder Bay Flyers (ushl)", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
-            ["How many different nationalities do the players of New Jersey Devils come from?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
-            ["What's Dorain Anneck's pick number?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
-        ],
-        inputs=[question, schema, beams, max_len],
-        outputs=[sql_out, raw_out, parsed_out],
-        fn=predict,
-        cache_examples=False,
-    )
     gr.HTML("""
-    <div class="footer-section">
-        <span class="info-badge">T5-base</span>&nbsp;
-        <span class="info-badge">WikiSQL</span>&nbsp;
-        <span class="info-badge">Seq2Seq</span>&nbsp;
-        <span class="info-badge">Structured Output</span>
-        <p style="margin-top:0.75rem;">
-            Model: <a href="https://huggingface.co/RealMati/t2sql_v6_structured" target="_blank">RealMati/t2sql_v6_structured</a>
-        </p>
     </div>
     """)

 import gradio as gr
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
+import time
 MODEL_ID = "RealMati/t2sql_v6_structured"
 print("Model loaded.")
 AGG_OPS = ["", "MAX", "MIN", "COUNT", "SUM", "AVG"]
+AGG_LABELS = ["None", "MAX", "MIN", "COUNT", "SUM", "AVG"]
 OPS = ["=", ">", "<", ">=", "<=", "!="]
 css_path = os.path.join(os.path.dirname(__file__), "style.css")
 with open(css_path, "r") as f:
     CSS = f.read()
     elif sel is not None:
         parts.append(f"Column index: {sel}")
     if agg is not None:
+        agg_label = AGG_LABELS[agg] if agg < len(AGG_LABELS) else str(agg)
         parts.append(f"Aggregation: {agg_label}")
     if conds:
         cond_strs = []
             c_name = columns[c_idx] if c_idx < len(columns) else f"col{c_idx}"
             op_str = OPS[c_op] if c_op < len(OPS) else "="
             cond_strs.append(f"{c_name} {op_str} {c_val}")
+        parts.append(f"Conditions: {' AND '.join(cond_strs)}")
     else:
         parts.append("Conditions: None")
+    return "  |  ".join(parts)
 def predict(question, schema, num_beams, max_length):
+    if not question or not question.strip():
+        return (
+            "-- Enter a question and schema above, then click Generate SQL",
+            "Waiting for input...",
+            "No query submitted yet",
+            "",
+        )
     table_name, columns = parse_schema(schema)
+    if not columns:
+        return (
+            "-- Please provide a database schema\n-- Format: table_name: col1, col2, col3",
+            "Cannot generate without schema",
+            "Schema is required to map column indices",
+            "",
+        )
     input_text = f"translate to SQL: {question}"
     if schema.strip():
         input_text += f" | schema: {schema.strip()}"
     inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
+    t0 = time.time()
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             early_stopping=True,
             do_sample=False,
         )
+    latency = time.time() - t0
     raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
     sel, agg, conds = decode_structured_output(raw_output)
     if sel is not None and agg is not None and columns:
         sql = structured_to_sql(sel, agg, conds, columns, table_name)
     else:
+        sql = f"-- Could not parse model output\n-- Raw: {raw_output}"
+    parsed = format_parsed(sel, agg, conds, columns) if sel is not None else "Parse failed"
+    latency_str = f"Inference: {latency:.2f}s  |  Beams: {int(num_beams)}  |  Input tokens: {inputs['input_ids'].shape[1]}"
+    return sql, raw_output, parsed, latency_str
 theme = gr.themes.Soft(
     font_mono=gr.themes.GoogleFont("Fira Code"),
 )
+with gr.Blocks(title="Text-to-SQL | T5 Fine-tuned on WikiSQL") as demo:
+    # ── Header ──
     gr.HTML("""
     <div class="main-header">
         <h1>Text-to-SQL</h1>
+        <p class="tagline">A fine-tuned T5 encoder-decoder that translates natural language
+        into structured SQL via learned column &amp; operator indices</p>
+        <a class="model-link" href="https://huggingface.co/RealMati/t2sql_v6_structured" target="_blank">
+            View Model on HuggingFace
+        </a>
     </div>
     """)
+    # ── Tech Badges ──
     gr.HTML("""
+    <div class="tech-badges">
+        <span class="badge badge-indigo">T5-base (220M params)</span>
+        <span class="badge badge-purple">Seq2Seq</span>
+        <span class="badge badge-emerald">WikiSQL (80K+ examples)</span>
+        <span class="badge badge-amber">Structured Output</span>
     </div>
     """)
+    # ── Pipeline Strip ──
+    gr.HTML("""
+    <div class="pipeline-strip">
+        <span class="step step-input">Natural Language</span>
+        <span class="arrow">&rarr;</span>
+        <span class="step step-model">T5 Encoder-Decoder</span>
+        <span class="arrow">&rarr;</span>
+        <span class="step step-struct">SEL | AGG | CONDS</span>
+        <span class="arrow">&rarr;</span>
+        <span class="step step-sql">Executable SQL</span>
+    </div>
+    """)
+    # ── Tabs ──
+    with gr.Tabs():
+        # ═══════════ TAB 1: DEMO ═══════════
+        with gr.Tab("Demo"):
+            with gr.Row(equal_height=False):
+                # Left column — inputs
+                with gr.Column(scale=1):
+                    gr.Markdown("#### Query Input")
+                    question = gr.Textbox(
+                        label="Natural Language Question",
+                        placeholder="e.g. What is terrence ross' nationality?",
+                        lines=2,
+                    )
+                    schema = gr.Textbox(
+                        label="Database Schema",
+                        placeholder="table_name: col1, col2, col3, ...",
+                        lines=2,
+                    )
+                    gr.HTML('<p class="input-hint">Format: <code>table_name: column1, column2, column3</code> &mdash; column order matters (maps to indices)</p>')
+                    with gr.Row():
+                        beams = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Beam Size")
+                        max_len = gr.Slider(minimum=64, maximum=512, value=256, step=64, label="Max Length")
+                    btn = gr.Button("Generate SQL", variant="primary", elem_classes=["generate-btn"], size="lg")
+                # Right column — outputs
+                with gr.Column(scale=1):
+                    gr.Markdown("#### Model Output")
+                    sql_out = gr.Textbox(
+                        label="Generated SQL",
+                        value="-- Enter a question and schema above, then click Generate SQL",
+                        lines=3,
+                        elem_classes=["sql-output"],
+                    )
+                    with gr.Row():
+                        raw_out = gr.Textbox(
+                            label="Raw Structured Tokens",
+                            value="Waiting for input...",
+                            lines=1,
+                            elem_classes=["decode-box"],
+                        )
+                    parsed_out = gr.Textbox(
+                        label="Decoded Mapping",
+                        value="No query submitted yet",
+                        lines=1,
+                        elem_classes=["decode-box"],
+                    )
+                    latency_out = gr.Textbox(
+                        label="Performance",
+                        value="",
+                        lines=1,
+                        elem_classes=["decode-box"],
+                    )
+            btn.click(
+                fn=predict,
+                inputs=[question, schema, beams, max_len],
+                outputs=[sql_out, raw_out, parsed_out, latency_out],
             )
+            question.submit(
+                fn=predict,
+                inputs=[question, schema, beams, max_len],
+                outputs=[sql_out, raw_out, parsed_out, latency_out],
             )
+            # ── Examples ──
+            gr.Markdown("#### Example Queries")
+            gr.Examples(
+                examples=[
+                    ["What is terrence ross' nationality", "players: Player, No., Nationality, Position, Years in Toronto, School/Club Team", 5, 256],
+                    ["how many schools or teams had jalen rose", "players: Player, No., Nationality, Position, Years in Toronto, School/Club Team", 5, 256],
+                    ["What was the date of the race in Misano?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
+                    ["What was the number of race that Kevin Curtain won?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
+                    ["Where was Assen held?", "races: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report", 5, 256],
+                    ["How many different positions did Sherbrooke Faucons (qmjhl) provide in the draft?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
+                    ["What are the nationalities of the player picked from Thunder Bay Flyers (ushl)", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
+                    ["How many different nationalities do the players of New Jersey Devils come from?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
+                    ["What's Dorain Anneck's pick number?", "draft: Pick, Player, Position, Nationality, NHL team, College/junior/club team", 5, 256],
+                ],
+                inputs=[question, schema, beams, max_len],
+                outputs=[sql_out, raw_out, parsed_out, latency_out],
+                fn=predict,
+                cache_examples=False,
             )
+        # ═══════════ TAB 2: HOW IT WORKS ═══════════
+        with gr.Tab("How It Works"):
+            gr.HTML("""
+            <div class="arch-section">
+                <div class="arch-card">
+                    <h3>Architecture Overview</h3>
+                    <p>This system uses a <strong>T5-base</strong> (Text-to-Text Transfer Transformer) model
+                    fine-tuned on the <strong>WikiSQL</strong> dataset. Instead of generating raw SQL strings directly,
+                    the model outputs <em>structured tokens</em> that encode the query as column indices and operator codes.
+                    A deterministic decoder then maps these indices back to actual column names using the provided schema.</p>
+                </div>
+                <div class="arch-grid">
+                    <div class="arch-card">
+                        <h3>1. Input Encoding</h3>
+                        <p>The natural language question and database schema are concatenated into a single input string:</p>
+                        <p><code>translate to SQL: {question} | schema: {table}: {col1}, {col2}, ...</code></p>
+                        <p>The schema provides the column vocabulary. Column order is critical &mdash;
+                        the model references columns by their <strong>positional index</strong> (0-based).</p>
+                    </div>
+                    <div class="arch-card">
+                        <h3>2. T5 Generation</h3>
+                        <p>The encoder processes the full input sequence. The decoder then generates structured tokens
+                        using beam search (default: 5 beams) with early stopping.</p>
+                        <p>Output format: <code>SEL:{col_idx} | AGG:{agg_idx} | CONDS:{col},{op},{val};...</code></p>
+                    </div>
+                    <div class="arch-card">
+                        <h3>3. Structured Decoding</h3>
+                        <p>The raw token string is parsed into three components:</p>
+                        <ul style="margin:0.5rem 0; padding-left:1.2rem;">
+                            <li><strong>SEL</strong> &mdash; which column to SELECT (index into schema)</li>
+                            <li><strong>AGG</strong> &mdash; aggregation function (0=none, 1=MAX, 2=MIN, 3=COUNT, 4=SUM, 5=AVG)</li>
+                            <li><strong>CONDS</strong> &mdash; WHERE conditions as <code>col_idx,op_idx,value</code> tuples</li>
+                        </ul>
+                    </div>
+                    <div class="arch-card">
+                        <h3>4. SQL Assembly</h3>
+                        <p>Column indices are mapped back to actual column names from the schema. Operator indices
+                        are converted to SQL operators (=, >, <, >=, <=, !=). The components are assembled into
+                        a valid SQL query with proper quoting and escaping.</p>
+                    </div>
+                </div>
+                <div class="arch-card">
+                    <h3>Why Structured Output?</h3>
+                    <p>Generating SQL as structured indices rather than free-form text provides several advantages:</p>
+                    <ul style="margin:0.5rem 0; padding-left:1.2rem;">
+                        <li><strong>Schema-agnostic</strong> &mdash; The model learns query patterns, not specific column names.
+                        It generalizes across any table schema.</li>
+                        <li><strong>Syntactically valid</strong> &mdash; The deterministic decoder guarantees well-formed SQL.
+                        No risk of misspelled keywords or broken syntax.</li>
+                        <li><strong>Smaller output space</strong> &mdash; The model only needs to predict a few integers and condition values,
+                        reducing the search space and improving accuracy.</li>
+                        <li><strong>Interpretable</strong> &mdash; Each component (SEL, AGG, CONDS) can be inspected independently,
+                        making debugging and analysis straightforward.</li>
+                    </ul>
+                </div>
+                <div class="arch-card">
+                    <h3>Encoding Reference</h3>
+                    <table class="encoding-table">
+                        <tr>
+                            <th>Component</th>
+                            <th>Index</th>
+                            <th>Meaning</th>
+                        </tr>
+                        <tr><td rowspan="6"><strong>AGG</strong></td>
+                            <td class="mono">0</td><td>No aggregation (plain SELECT)</td></tr>
+                        <tr><td class="mono">1</td><td>MAX</td></tr>
+                        <tr><td class="mono">2</td><td>MIN</td></tr>
+                        <tr><td class="mono">3</td><td>COUNT</td></tr>
+                        <tr><td class="mono">4</td><td>SUM</td></tr>
+                        <tr><td class="mono">5</td><td>AVG</td></tr>
+                        <tr><td rowspan="6"><strong>OP</strong> (in CONDS)</td>
+                            <td class="mono">0</td><td>= (equals)</td></tr>
+                        <tr><td class="mono">1</td><td>> (greater than)</td></tr>
+                        <tr><td class="mono">2</td><td>< (less than)</td></tr>
+                        <tr><td class="mono">3</td><td>>= (greater or equal)</td></tr>
+                        <tr><td class="mono">4</td><td><= (less or equal)</td></tr>
+                        <tr><td class="mono">5</td><td>!= (not equal)</td></tr>
+                    </table>
+                </div>
+            </div>
+            """)
+        # ═══════════ TAB 3: MODEL INFO ═══════════
+        with gr.Tab("Model & Training"):
+            gr.HTML("""
+            <div class="arch-section">
+                <div class="stats-grid">
+                    <div class="stat-card">
+                        <div class="stat-value">220M</div>
+                        <div class="stat-label">Parameters</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="stat-value">80K+</div>
+                        <div class="stat-label">Training Examples</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="stat-value">T5-base</div>
+                        <div class="stat-label">Architecture</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="stat-value">WikiSQL</div>
+                        <div class="stat-label">Dataset</div>
+                    </div>
+                </div>
+                <div class="arch-grid">
+                    <div class="arch-card">
+                        <h3>Model Architecture</h3>
+                        <ul style="margin:0.5rem 0; padding-left:1.2rem;">
+                            <li><strong>Base model:</strong> T5-base (encoder-decoder transformer)</li>
+                            <li><strong>Tokenizer:</strong> SentencePiece (32K vocabulary)</li>
+                            <li><strong>Max input length:</strong> 512 tokens</li>
+                            <li><strong>Max output length:</strong> 256 tokens</li>
+                            <li><strong>Decoding:</strong> Beam search (default 5 beams)</li>
+                            <li><strong>Framework:</strong> HuggingFace Transformers + PyTorch</li>
+                        </ul>
+                    </div>
+                    <div class="arch-card">
+                        <h3>Training Details</h3>
+                        <ul style="margin:0.5rem 0; padding-left:1.2rem;">
+                            <li><strong>Dataset:</strong> WikiSQL (Zhong et al., 2017)</li>
+                            <li><strong>Train split:</strong> ~56,355 examples</li>
+                            <li><strong>Dev split:</strong> ~8,421 examples</li>
+                            <li><strong>Test split:</strong> ~15,878 examples</li>
+                            <li><strong>Output format:</strong> Structured tokens (SEL/AGG/CONDS)</li>
+                            <li><strong>Task prefix:</strong> <code>translate to SQL:</code></li>
+                        </ul>
+                    </div>
+                    <div class="arch-card">
+                        <h3>Dataset: WikiSQL</h3>
+                        <p>WikiSQL is a large-scale dataset of 80,654 hand-annotated SQL queries and natural language
+                        questions corresponding to 24,241 tables from Wikipedia. Each query operates on a single table
+                        and supports SELECT, aggregation (COUNT, SUM, MAX, MIN, AVG), and WHERE conditions
+                        with comparison operators.</p>
+                        <p style="margin-top:0.5rem;">
+                            <a href="https://github.com/salesforce/WikiSQL" target="_blank" style="color:#667eea;">
+                                github.com/salesforce/WikiSQL
+                            </a>
+                        </p>
+                    </div>
+                    <div class="arch-card">
+                        <h3>Limitations</h3>
+                        <ul style="margin:0.5rem 0; padding-left:1.2rem;">
+                            <li><strong>Single-table only</strong> &mdash; No JOINs or subqueries (WikiSQL constraint)</li>
+                            <li><strong>Fixed operators</strong> &mdash; Limited to =, >, <, >=, <=, != </li>
+                            <li><strong>No GROUP BY / ORDER BY</strong> &mdash; Not in the WikiSQL schema</li>
+                            <li><strong>AND-only conditions</strong> &mdash; Multiple conditions are joined with AND</li>
+                            <li><strong>Schema required</strong> &mdash; Column names and order must be provided</li>
+                        </ul>
+                    </div>
+                </div>
+            </div>
+            """)
+    # ── Footer ──
     gr.HTML("""
+    <div class="app-footer">
+        Built with <a href="https://huggingface.co/docs/transformers" target="_blank">Transformers</a>
+        &amp; <a href="https://gradio.app" target="_blank">Gradio</a>
+        &nbsp;&bull;&nbsp;
+        Model: <a href="https://huggingface.co/RealMati/t2sql_v6_structured" target="_blank">RealMati/t2sql_v6_structured</a>
+        &nbsp;&bull;&nbsp;
+        Dataset: <a href="https://github.com/salesforce/WikiSQL" target="_blank">WikiSQL</a>
     </div>
     """)