import gradio as gr from datasets import load_dataset import pandas as pd # Datasets to include DATASETS = { "CS1": "withmartian/cs1_dataset", "CS2": "withmartian/cs2_dataset", "CS3": "withmartian/cs3_dataset", "CS2 Synonyms": "withmartian/cs2_dataset_synonyms", "CS3 Synonyms": "withmartian/cs3_dataset_synonyms", "CS4 Synonyms": "withmartian/cs4_dataset_synonyms", } COLUMNS = ["create_statement", "english_prompt", "sql_statement"] def load_preview(dataset_name): """Load first 500 rows of selected dataset""" try: ds = load_dataset(DATASETS[dataset_name], split="train") df = pd.DataFrame(ds)[COLUMNS].head(500) return df except Exception as e: return pd.DataFrame({"Error": [str(e)]}) def filter_dataframe(df, search_query): """Filter dataframe by search query across all columns""" if not search_query or df.empty or "Error" in df.columns: return df mask = df.astype(str).apply( lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1 ) return df[mask] # CSS styling custom_css = """ :root { --martian-orange: #FF6B4A; --martian-black: #0A0A0A; --martian-gray-dark: #1A1A1A; --martian-gray-medium: #2A2A2A; --martian-gray-light: #3A3A3A; } .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; background-color: var(--martian-black) !important; color: #E0E0E0 !important; } .header-section { text-align: center; padding: 2.5rem 1.5rem; background: linear-gradient(135deg, var(--martian-gray-dark) 0%, var(--martian-gray-medium) 100%); border-radius: 16px; margin-bottom: 2rem; color: white; box-shadow: 0 4px 6px rgba(0,0,0,0.3); } .header-section h1 { font-size: 2.2rem; font-weight: 700; margin-bottom: 0.75rem; } .header-section .subtitle { font-size: 1.1rem; opacity: 0.9; line-height: 1.6; } .orange-accent { color: var(--martian-orange); font-weight: 600; } .info-box { background: var(--martian-gray-dark); border-radius: 12px; padding: 1.5rem; margin: 1.5rem 0; border-left: 4px solid var(--martian-orange); color: #E0E0E0; } .dataset-guide { background: var(--martian-gray-dark); border-radius: 8px; padding: 1rem; margin-top: 1rem; font-size: 0.9rem; color: #D0D0D0; } button.primary { background: var(--martian-orange) !important; border: none !important; color: white !important; font-weight: 600 !important; } button.primary:hover { background: #FF5733 !important; transform: translateY(-1px); box-shadow: 0 4px 8px rgba(255, 107, 74, 0.3); } input, select, textarea { background: var(--martian-gray-medium) !important; border-color: var(--martian-gray-light) !important; color: #E0E0E0 !important; } .dataframe { background: var(--martian-gray-dark) !important; } label { color: #D0D0D0 !important; } .label-wrap span { color: var(--martian-orange) !important; } """ def dataset_viewer(): with gr.Blocks(css=custom_css, title="TinySQL Dataset Viewer") as viewer: # Header gr.HTML("""

TinySQL Dataset Viewer

Browse dataset previews, search, and filter queries with ease

""") # Info box gr.HTML("""
Preview Mode: Showing first 500 rows of each dataset. Use search to filter results in real-time.
""") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Dataset Selection") dataset_dropdown = gr.Dropdown( choices=list(DATASETS.keys()), value="CS1", label="Choose Dataset", info="Select a dataset to preview" ) gr.HTML("""
Complexity Levels:

CS1: Basic SELECT-FROM
CS2: Adds ORDER BY
CS3: Aggregations
CS4: Adds WHERE filters

Synonyms: Natural language variations
""") load_btn = gr.Button("Load Dataset", variant="primary", size="lg") gr.HTML("
") demo_btn = gr.Button("🚀 Try Model Demo", variant="primary") with gr.Column(scale=3): gr.Markdown("### Dataset Preview (First 500 Rows)") search_box = gr.Textbox( label="Search", placeholder="Search across all columns...", lines=1 ) df_display = gr.Dataframe( headers=COLUMNS, datatype=["str", "str", "str"], interactive=False, wrap=True, max_rows=20, label="Results" ) stats_display = gr.Markdown("Click 'Load Dataset' to begin") # Store the loaded dataframe df_state = gr.State(value=pd.DataFrame()) # Load dataset def load_and_display(dataset_name): df = load_preview(dataset_name) if "Error" in df.columns: return df, df, "❌ Error loading dataset" stats = f"**Loaded:** {len(df)} rows | **Columns:** {', '.join(COLUMNS)}" return df, df, stats load_btn.click( fn=load_and_display, inputs=dataset_dropdown, outputs=[df_state, df_display, stats_display] ) # Search functionality def search_and_display(df, query): if df.empty: return df, "Load a dataset first" filtered_df = filter_dataframe(df, query) stats = f"**Showing:** {len(filtered_df)} of {len(df)} rows" if query: stats += f" | **Search:** '{query}'" return filtered_df, stats search_box.change( fn=search_and_display, inputs=[df_state, search_box], outputs=[df_display, stats_display] ) # Open model demo demo_btn.click( lambda: None, None, None, _js="()=>{ window.open('https://huggingface.co/spaces/abir-hr196/tinysql-demo','_blank'); }" ) return viewer if __name__ == "__main__": dataset_viewer().launch()