tinysql-demo / tinysql_dataset_viewer.py
abir-hr196's picture
updates
edf7bcc
raw
history blame
7.2 kB
import gradio as gr
from datasets import load_dataset
import pandas as pd
# Datasets to include
DATASETS = {
"CS1": "withmartian/cs1_dataset",
"CS2": "withmartian/cs2_dataset",
"CS3": "withmartian/cs3_dataset",
"CS2 Synonyms": "withmartian/cs2_dataset_synonyms",
"CS3 Synonyms": "withmartian/cs3_dataset_synonyms",
"CS4 Synonyms": "withmartian/cs4_dataset_synonyms",
}
COLUMNS = ["create_statement", "english_prompt", "sql_statement"]
def load_preview(dataset_name):
"""Load first 500 rows of selected dataset"""
try:
ds = load_dataset(DATASETS[dataset_name], split="train")
df = pd.DataFrame(ds)[COLUMNS].head(500)
return df
except Exception as e:
return pd.DataFrame({"Error": [str(e)]})
def filter_dataframe(df, search_query):
"""Filter dataframe by search query across all columns"""
if not search_query or df.empty or "Error" in df.columns:
return df
mask = df.astype(str).apply(
lambda row: row.str.contains(search_query, case=False, na=False).any(),
axis=1
)
return df[mask]
# CSS styling
custom_css = """
:root {
--martian-orange: #FF6B4A;
--martian-black: #0A0A0A;
--martian-gray-dark: #1A1A1A;
--martian-gray-medium: #2A2A2A;
--martian-gray-light: #3A3A3A;
}
.gradio-container {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
background-color: var(--martian-black) !important;
color: #E0E0E0 !important;
}
.header-section {
text-align: center;
padding: 2.5rem 1.5rem;
background: linear-gradient(135deg, var(--martian-gray-dark) 0%, var(--martian-gray-medium) 100%);
border-radius: 16px;
margin-bottom: 2rem;
color: white;
box-shadow: 0 4px 6px rgba(0,0,0,0.3);
}
.header-section h1 {
font-size: 2.2rem;
font-weight: 700;
margin-bottom: 0.75rem;
}
.header-section .subtitle {
font-size: 1.1rem;
opacity: 0.9;
line-height: 1.6;
}
.orange-accent {
color: var(--martian-orange);
font-weight: 600;
}
.info-box {
background: var(--martian-gray-dark);
border-radius: 12px;
padding: 1.5rem;
margin: 1.5rem 0;
border-left: 4px solid var(--martian-orange);
color: #E0E0E0;
}
.dataset-guide {
background: var(--martian-gray-dark);
border-radius: 8px;
padding: 1rem;
margin-top: 1rem;
font-size: 0.9rem;
color: #D0D0D0;
}
button.primary {
background: var(--martian-orange) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
}
button.primary:hover {
background: #FF5733 !important;
transform: translateY(-1px);
box-shadow: 0 4px 8px rgba(255, 107, 74, 0.3);
}
input, select, textarea {
background: var(--martian-gray-medium) !important;
border-color: var(--martian-gray-light) !important;
color: #E0E0E0 !important;
}
.dataframe {
background: var(--martian-gray-dark) !important;
}
label {
color: #D0D0D0 !important;
}
.label-wrap span {
color: var(--martian-orange) !important;
}
"""
def dataset_viewer():
with gr.Blocks(css=custom_css, title="TinySQL Dataset Viewer") as viewer:
# Header
gr.HTML("""
<div class="header-section">
<h1>TinySQL Dataset Viewer</h1>
<p class="subtitle">
Browse dataset previews, search, and filter queries with <span class="orange-accent">ease</span>
</p>
</div>
""")
# Info box
gr.HTML("""
<div class="info-box">
<strong>Preview Mode:</strong> Showing first 500 rows of each dataset. Use search to filter results in real-time.
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Dataset Selection")
dataset_dropdown = gr.Dropdown(
choices=list(DATASETS.keys()),
value="CS1",
label="Choose Dataset",
info="Select a dataset to preview"
)
gr.HTML("""
<div class="dataset-guide">
<strong>Complexity Levels:</strong><br><br>
<strong>CS1:</strong> Basic SELECT-FROM<br>
<strong>CS2:</strong> Adds ORDER BY<br>
<strong>CS3:</strong> Aggregations<br>
<strong>CS4:</strong> Adds WHERE filters<br><br>
<strong>Synonyms:</strong> Natural language variations
</div>
""")
load_btn = gr.Button("Load Dataset", variant="primary", size="lg")
gr.HTML("<br>")
demo_btn = gr.Button("πŸš€ Try Model Demo", variant="primary")
with gr.Column(scale=3):
gr.Markdown("### Dataset Preview (First 500 Rows)")
search_box = gr.Textbox(
label="Search",
placeholder="Search across all columns...",
lines=1
)
df_display = gr.Dataframe(
headers=COLUMNS,
datatype=["str", "str", "str"],
interactive=False,
wrap=True,
label="Results",
max_height=600 # Use height instead to control visible area
)
stats_display = gr.Markdown("Click 'Load Dataset' to begin")
# Store the loaded dataframe
df_state = gr.State(value=pd.DataFrame())
# Load dataset
def load_and_display(dataset_name):
df = load_preview(dataset_name)
if "Error" in df.columns:
return df, df, "❌ Error loading dataset"
stats = f"**Loaded:** {len(df)} rows | **Columns:** {', '.join(COLUMNS)}"
return df, df, stats
load_btn.click(
fn=load_and_display,
inputs=dataset_dropdown,
outputs=[df_state, df_display, stats_display]
)
# Search functionality
def search_and_display(df, query):
if df.empty:
return df, "Load a dataset first"
filtered_df = filter_dataframe(df, query)
stats = f"**Showing:** {len(filtered_df)} of {len(df)} rows"
if query:
stats += f" | **Search:** '{query}'"
return filtered_df, stats
search_box.change(
fn=search_and_display,
inputs=[df_state, search_box],
outputs=[df_display, stats_display]
)
# Open model demo
demo_btn.click(
lambda: None,
None,
None,
_js="()=>{ window.open('https://huggingface.co/spaces/abir-hr196/tinysql-demo','_blank'); }"
)
return viewer
if __name__ == "__main__":
dataset_viewer().launch()