abir-hr196 commited on
Commit
83fe832
·
1 Parent(s): e07e1fe

data_viewer v1

Browse files
Files changed (1) hide show
  1. tinysql_dataset_viewer.py +119 -128
tinysql_dataset_viewer.py CHANGED
@@ -1,153 +1,144 @@
1
- # tinysql_dataset_viewer.py
2
  import gradio as gr
3
  from datasets import load_dataset
4
  import pandas as pd
5
- import urllib.parse
6
- import html
7
- import traceback
8
 
9
- HF_DATASETS = {
 
10
  "CS1": "withmartian/cs1_dataset",
11
  "CS2": "withmartian/cs2_dataset",
12
  "CS3": "withmartian/cs3_dataset",
13
- "CS2_synonyms": "withmartian/cs2_dataset_synonyms",
14
- "CS3_synonyms": "withmartian/cs3_dataset_synonyms",
15
- "CS4_synonyms": "withmartian/cs4_dataset_synonyms",
16
  }
17
 
18
- DEMO_URL = "https://huggingface.co/spaces/abir-hr196/tinysql-demo"
19
- PREVIEW_LIMIT = 500
20
- FIELDS = ["english_prompt", "create_statement", "sql_statement"]
21
- dataset_cache = {}
22
 
23
- # ---------------- Helpers ----------------
24
- def load_preview(dataset_id, limit=PREVIEW_LIMIT):
25
  try:
26
- ds = load_dataset(dataset_id, split=f"train[:{limit}]")
27
- except Exception:
28
- full = load_dataset(dataset_id)
29
- first_split = list(full.keys())[0] if isinstance(full, dict) else None
30
- if first_split:
31
- ds = full[first_split].select(range(min(len(full[first_split]), limit)))
32
- else:
33
- ds = full.select(range(min(len(full), limit)))
34
- df = pd.DataFrame(ds)
35
- for f in FIELDS:
36
- if f not in df.columns:
37
- df[f] = ""
38
- df = df[FIELDS].copy()
39
- df.reset_index(inplace=True)
40
- df.rename(columns={"index": "example_index"}, inplace=True)
41
- return df
42
-
43
- def get_dataset_preview(name):
44
- if name in dataset_cache:
45
- return dataset_cache[name]
46
- df = load_preview(HF_DATASETS[name])
47
- dataset_cache[name] = df
48
- return df
49
-
50
- def make_dropdown_options(df):
51
- opts = []
52
- for _, row in df.iterrows():
53
- idx = int(row["example_index"])
54
- prompt = (row["english_prompt"] or "")
55
- short = " ".join(prompt.split())[:120] + ("…" if len(prompt) > 120 else "")
56
- opts.append((f"{idx} — {short}", idx))
57
- return opts
58
-
59
- def filter_dataframe(df, query):
60
- if not query:
61
  return df
62
- q = str(query).lower()
63
- mask = df["english_prompt"].fillna("").str.lower().str.contains(q) | df["sql_statement"].fillna("").str.lower().str.contains(q)
64
- return df[mask].reset_index(drop=True)
65
-
66
- # ---------------- Gradio callbacks ----------------
67
- def on_dataset_change(dataset_name):
68
- try:
69
- df = get_dataset_preview(dataset_name)
70
- displayed = df[["example_index", "english_prompt", "sql_statement", "create_statement"]]
71
- opts = make_dropdown_options(displayed)
72
- return displayed, gr.Dropdown.update(choices=opts, value=opts[0][1] if opts else None), ""
73
  except Exception as e:
74
- tb = traceback.format_exc()
75
- return pd.DataFrame([], columns=["id", "english_prompt", "sql_statement", "create_statement"]), gr.Dropdown.update(choices=[], value=None), f"Error loading dataset: {e}\n{tb}"
76
 
77
- def on_search(dataset_name, query):
78
- try:
79
- df = get_dataset_preview(dataset_name)
80
- filtered = filter_dataframe(df, query)
81
- displayed = filtered[["example_index", "english_prompt", "sql_statement", "create_statement"]]
82
- opts = make_dropdown_options(displayed)
83
- return displayed, gr.Dropdown.update(choices=opts, value=opts[0][1] if opts else None)
84
- except Exception as e:
85
- return pd.DataFrame([], columns=["id", "english_prompt", "sql_statement", "create_statement"]), gr.Dropdown.update(choices=[], value=None)
 
86
 
87
- def send_to_demo(dataset_name, selected_index):
88
- try:
89
- df = get_dataset_preview(dataset_name)
90
- row = df[df["example_index"] == int(selected_index)]
91
- if row.empty:
92
- return html.escape("Selected example not found.")
93
- instr = str(row.iloc[0]["english_prompt"] or "")
94
- schema = str(row.iloc[0]["create_statement"] or "")
95
- q_instr = urllib.parse.quote_plus(instr)
96
- q_schema = urllib.parse.quote_plus(schema)
97
- url = f"{DEMO_URL}?instruction={q_instr}&schema={q_schema}"
98
- safe_url = html.escape(url, quote=True)
99
- html_out = f"""
100
- <script>
101
- window.open("{safe_url}", "_blank");
102
- </script>
103
- <div style="color: #E0E0E0; font-family: Inter, sans-serif;">
104
- Opened the demo in a new tab. If your browser blocked the popup, <a href="{safe_url}" target="_blank" rel="noreferrer">click here</a>.
105
- </div>
106
- """
107
- return gr.HTML.update(value=html_out)
108
- except Exception as e:
109
- tb = traceback.format_exc()
110
- return gr.HTML.update(value=f"<pre style='color:#ffb3a7'>Error: {html.escape(str(e))}\n{html.escape(tb)}</pre>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # ---------------- Dataset viewer function ----------------
113
  def dataset_viewer():
114
- custom_css = """
115
- :root {
116
- --martian-orange: #FF6B4A;
117
- --martian-dark: #0E0E0E;
118
- --martian-gray-dark: #1A1A1A;
119
- --martian-gray-medium: #2A2A2A;
120
- --martian-gray-light: #3A3A3A;
121
- --martian-bg: #0E0E0E;
122
- }
123
- .gradio-container { background-color: var(--martian-bg) !important; font-family: 'Inter', sans-serif; }
124
- .header-section { text-align: center; padding: 2rem; background: linear-gradient(135deg, var(--martian-dark) 0%, var(--martian-gray-dark) 100%); border-radius: 12px; margin-bottom: 1rem; color:white;}
125
- .header-section h1 { font-size: 2rem; margin-bottom: 0.5rem; }
126
- .info-box { background: var(--martian-gray-dark); border-left: 4px solid var(--martian-orange); border-radius: 10px; padding:1rem; margin:1rem 0; color:#E0E0E0;}
127
- button, .gr-button { background: var(--martian-orange) !important; color:white !important; border:none !important;}
128
- .input-text, textarea, select, input, .gradio-dataframe { background: var(--martian-gray-medium) !important; border-color: var(--martian-gray-light) !important; color: #E0E0E0 !important; }
129
- a { color: var(--martian-orange) !important; }
130
- """
131
-
132
- with gr.Blocks(css=custom_css) as viewer:
133
- gr.HTML("""<div class="header-section"><h1>TinySQL — Dataset Viewer</h1><div class="subtitle">Browse dataset variants, filter examples, and send a selected example to the TinySQL model demo.</div></div>""")
134
- gr.HTML("""<div class="info-box"><strong>Note:</strong> Previews load the first 500 examples for fast exploration. Use the search box to filter prompts or SQL statements.</div>""")
135
  with gr.Row():
136
  with gr.Column(scale=1):
137
- dataset_dropdown = gr.Dropdown(choices=list(HF_DATASETS.keys()), value=list(HF_DATASETS.keys())[0], label="Dataset Variant")
138
- search_box = gr.Textbox(label="Search (prompt or SQL)", placeholder="Type keywords to filter prompts or SQL...")
139
- select_dropdown = gr.Dropdown(choices=[], label="Select example to try")
140
- try_button = gr.Button("Try in Model Demo", variant="primary")
141
- status_html = gr.HTML("")
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  with gr.Column(scale=3):
143
  df_display = gr.Dataframe(
144
- headers=["id", "english_prompt", "sql_statement", "create_statement"],
145
- value=pd.DataFrame(columns=["id", "english_prompt", "sql_statement", "create_statement"]),
146
- label="Preview (first 500 rows)"
 
 
147
  )
148
 
149
- dataset_dropdown.change(fn=on_dataset_change, inputs=[dataset_dropdown], outputs=[df_display, select_dropdown, status_html])
150
- search_box.change(fn=on_search, inputs=[dataset_dropdown, search_box], outputs=[df_display, select_dropdown])
151
- try_button.click(fn=send_to_demo, inputs=[dataset_dropdown, select_dropdown], outputs=[status_html])
 
 
 
152
 
153
  return viewer
 
 
 
 
 
1
  import gradio as gr
2
  from datasets import load_dataset
3
  import pandas as pd
 
 
 
4
 
5
+ # Datasets to include
6
+ DATASETS = {
7
  "CS1": "withmartian/cs1_dataset",
8
  "CS2": "withmartian/cs2_dataset",
9
  "CS3": "withmartian/cs3_dataset",
10
+ "CS2 Synonyms": "withmartian/cs2_dataset_synonyms",
11
+ "CS3 Synonyms": "withmartian/cs3_dataset_synonyms",
12
+ "CS4 Synonyms": "withmartian/cs4_dataset_synonyms",
13
  }
14
 
15
+ # Columns to show
16
+ COLUMNS = ["create_statement", "english_prompt", "sql_statement"]
 
 
17
 
18
+ # Load small preview of dataset (first 500 rows)
19
+ def load_preview(dataset_name):
20
  try:
21
+ ds = load_dataset(DATASETS[dataset_name], split="train")
22
+ df = pd.DataFrame(ds)[COLUMNS].head(500)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  return df
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
+ return pd.DataFrame({"Error": [str(e)]})
 
26
 
27
+ # CSS: deep black background + orange accents, no purple
28
+ custom_css = """
29
+ :root {
30
+ --martian-orange: #FF6B4A;
31
+ --martian-black: #0A0A0A;
32
+ --martian-gray-dark: #1A1A1A;
33
+ --martian-gray-medium: #2A2A2A;
34
+ --martian-gray-light: #3A3A3A;
35
+ --martian-bg: #0A0A0A;
36
+ }
37
 
38
+ .gradio-container {
39
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
40
+ background-color: var(--martian-bg) !important;
41
+ color: #E0E0E0 !important;
42
+ }
43
+
44
+ .header-section {
45
+ text-align: center;
46
+ padding: 2rem 1rem;
47
+ background: linear-gradient(135deg, var(--martian-gray-dark) 0%, var(--martian-gray-medium) 100%);
48
+ border-radius: 12px;
49
+ margin-bottom: 1.5rem;
50
+ color: white;
51
+ }
52
+
53
+ .header-section h1 {
54
+ font-size: 2rem;
55
+ font-weight: 700;
56
+ margin-bottom: 0.5rem;
57
+ }
58
+
59
+ .header-section .subtitle {
60
+ font-size: 1rem;
61
+ opacity: 0.85;
62
+ line-height: 1.5;
63
+ }
64
+
65
+ .orange-accent {
66
+ color: var(--martian-orange);
67
+ font-weight: 600;
68
+ }
69
+
70
+ button.primary {
71
+ background: var(--martian-orange) !important;
72
+ border: none !important;
73
+ color: white !important;
74
+ }
75
+
76
+ button.primary:hover {
77
+ background: #FF5733 !important;
78
+ }
79
+
80
+ input, select, textarea {
81
+ background: var(--martian-gray-medium) !important;
82
+ border-color: var(--martian-gray-light) !important;
83
+ color: #E0E0E0 !important;
84
+ }
85
+
86
+ .dataframe-container {
87
+ background: var(--martian-gray-dark) !important;
88
+ color: #E0E0E0 !important;
89
+ }
90
+ """
91
 
 
92
  def dataset_viewer():
93
+ with gr.Blocks(css=custom_css, title="TinySQL Dataset Viewer") as viewer:
94
+ # Header
95
+ gr.HTML("""
96
+ <div class="header-section">
97
+ <h1>TinySQL Dataset Viewer</h1>
98
+ <p class="subtitle">
99
+ Browse dataset previews, search, and filter queries with <span class="orange-accent">ease</span>
100
+ </p>
101
+ </div>
102
+ """)
103
+
104
+ # Dataset selection dropdown
 
 
 
 
 
 
 
 
 
105
  with gr.Row():
106
  with gr.Column(scale=1):
107
+ dataset_dropdown = gr.Dropdown(
108
+ choices=list(DATASETS.keys()),
109
+ value="CS1",
110
+ label="Select Dataset"
111
+ )
112
+
113
+ # Button to open Model Demo in new tab
114
+ gr.Button(
115
+ "Try in Model Demo",
116
+ variant="primary",
117
+ elem_id="open_model_demo"
118
+ ).click(
119
+ lambda: "https://huggingface.co/spaces/abir-hr196/tinysql-demo",
120
+ None,
121
+ None,
122
+ _js="(url)=>{ window.open(url,'_blank'); }"
123
+ )
124
+
125
  with gr.Column(scale=3):
126
  df_display = gr.Dataframe(
127
+ headers=COLUMNS,
128
+ datatype=["str", "str", "str"],
129
+ interactive=False,
130
+ label="Dataset Preview",
131
+ max_rows=20
132
  )
133
 
134
+ # Update preview when dataset changes
135
+ dataset_dropdown.change(
136
+ fn=load_preview,
137
+ inputs=dataset_dropdown,
138
+ outputs=df_display
139
+ )
140
 
141
  return viewer
142
+
143
+ if __name__ == "__main__":
144
+ dataset_viewer().launch()