File size: 9,851 Bytes
e07e1fe
 
 
 
83fe832
e07e1fe
 
 
83fe832
 
 
e07e1fe
 
83fe832
e07e1fe
83fe832
e07e1fe
83fe832
f35a40c
 
 
e07e1fe
 
83fe832
e07e1fe
cb9c774
 
 
 
 
 
 
 
 
 
f35a40c
 
0e1846b
599e176
0e1846b
 
f35a40c
 
 
 
 
599e176
 
 
 
f35a40c
 
 
 
 
599e176
0e1846b
f35a40c
 
 
 
0e1846b
f35a40c
cb9c774
599e176
f35a40c
599e176
 
 
 
 
 
 
 
 
0e1846b
599e176
 
 
 
 
 
 
0e1846b
599e176
 
 
 
 
 
0e1846b
599e176
 
 
 
0e1846b
f35a40c
 
 
599e176
f35a40c
599e176
f35a40c
0e1846b
f35a40c
 
 
0e1846b
f35a40c
 
599e176
cb9c774
f35a40c
599e176
f35a40c
 
599e176
f35a40c
599e176
f35a40c
cb9c774
599e176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f35a40c
 
 
 
 
0e1846b
 
f35a40c
 
599e176
f35a40c
 
 
 
 
 
599e176
 
f35a40c
 
 
 
 
 
 
 
 
 
599e176
cb9c774
f35a40c
599e176
f35a40c
599e176
f35a40c
 
 
 
 
 
 
 
 
 
599e176
f35a40c
 
 
 
cb9c774
599e176
cb9c774
f35a40c
 
 
 
 
 
0e1846b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import gradio as gr
from datasets import load_dataset
import pandas as pd

DATASETS = {
    "CS1": "withmartian/cs1_dataset",
    "CS2": "withmartian/cs2_dataset",
    "CS3": "withmartian/cs3_dataset",
    "CS2 Synonyms": "withmartian/cs2_dataset_synonyms",
    "CS3 Synonyms": "withmartian/cs3_dataset_synonyms",
    "CS4 Synonyms": "withmartian/cs4_dataset_synonyms",
}

COLUMNS = ["create_statement", "english_prompt", "sql_statement"]

def load_preview(dataset_name):
    try:
        ds = load_dataset(DATASETS[dataset_name], split="train")
        df = pd.DataFrame(ds).head(500)
        if all(col in df.columns for col in COLUMNS):
            df = df[COLUMNS]
        return df
    except Exception as e:
        return pd.DataFrame({"Error": [str(e)]})

def filter_dataframe(df, search_query):
    if not search_query or df.empty or "Error" in df.columns:
        return df
    
    mask = df.astype(str).apply(
        lambda row: row.str.contains(search_query, case=False, na=False).any(), 
        axis=1
    )
    return df[mask]

def dataset_viewer(shared_instruction, shared_schema):
    gr.HTML("""
        <div style="text-align: center; padding: 2rem 1.5rem; background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%); border-radius: 16px; margin-bottom: 1.5rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
            <h2 style="font-size: 2rem; font-weight: 700; margin-bottom: 0.5rem; color: #FF6B4A;">Dataset Explorer</h2>
            <p style="font-size: 1rem; opacity: 0.9; line-height: 1.6; color: #D0D0D0;">
                Browse, search, and explore TinySQL datasets
            </p>
        </div>
    """)
    
    gr.HTML("""
        <div style="background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%); border-radius: 12px; padding: 1.5rem; margin: 1rem 0; border-left: 4px solid #FF6B4A;">
            <p style="color: #D0D0D0; margin: 0; line-height: 1.6;">
                <strong style="color: #FF6B4A;">Quick Start:</strong> Select a dataset, click Load Dataset, then use search to filter. Pick any row and send it to the Model Demo tab.
            </p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Controls")
            
            dataset_dropdown = gr.Dropdown(
                choices=list(DATASETS.keys()),
                value="CS1",
                label="Choose Dataset",
                info="Select complexity level"
            )
            
            # Better formatted model guide card
            gr.HTML("""
                <div style="background: #2A2A2A; border-radius: 12px; padding: 1.5rem; margin: 1.5rem 0; border: 1px solid #3A3A3A;">
                    <h4 style="color: #FF6B4A; font-size: 1rem; margin: 0 0 1.25rem 0; font-weight: 700; border-bottom: 2px solid #3A3A3A; padding-bottom: 0.75rem;">Dataset Complexity Levels</h4>
                    
                    <div style="margin-bottom: 1.5rem;">
                        <div style="color: #4CAF50; font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem;">Basic Level</div>
                        <div style="margin-left: 1rem; color: #999; font-size: 0.85rem; line-height: 1.8;">
                            <div><strong style="color: #D0D0D0;">CS1:</strong> Basic SELECT-FROM queries</div>
                            <div><strong style="color: #D0D0D0;">CS2:</strong> Adds ORDER BY clauses</div>
                        </div>
                    </div>
                    
                    <div style="margin-bottom: 1.5rem;">
                        <div style="color: #FF9800; font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem;">Intermediate Level</div>
                        <div style="margin-left: 1rem; color: #999; font-size: 0.85rem; line-height: 1.8;">
                            <div><strong style="color: #D0D0D0;">CS3:</strong> Aggregations (COUNT, SUM, AVG)</div>
                            <div><strong style="color: #D0D0D0;">CS4:</strong> Adds WHERE filters</div>
                        </div>
                    </div>
                    
                    <div>
                        <div style="color: #f44336; font-weight: 600; font-size: 0.95rem; margin-bottom: 0.5rem;">Advanced Level</div>
                        <div style="margin-left: 1rem; color: #999; font-size: 0.85rem; line-height: 1.8;">
                            <div><strong style="color: #D0D0D0;">CS5:</strong> Multi-table JOINs</div>
                        </div>
                    </div>
                    
                    <div style="margin-top: 1.5rem; padding-top: 1.25rem; border-top: 1px solid #3A3A3A;">
                        <div style="color: #FF6B4A; font-weight: 600; font-size: 0.9rem; margin-bottom: 0.5rem;">Synonym Variants</div>
                        <div style="color: #999; font-size: 0.85rem; line-height: 1.6;">Natural language variations with semantic mappings</div>
                    </div>
                </div>
            """)
            
            load_btn = gr.Button("Load Dataset", variant="primary", size="lg")
            
            gr.Markdown("### Test Example")
            row_selector = gr.Number(
                label="Row Number",
                value=0,
                minimum=0,
                precision=0,
                info="Pick a row to test"
            )
            
            send_to_model_btn = gr.Button("Run in Model Demo", variant="primary")
        
        with gr.Column(scale=3):
            gr.Markdown("### Dataset Preview")
            
            search_box = gr.Textbox(
                label="Search",
                placeholder="Search across all columns...",
                lines=1
            )
            
            # HuggingFace-style table
            gr.HTML("""
                <style>
                /* True HuggingFace-style table */
                .dataframe-container {
                    border-radius: 8px !important;
                    overflow: hidden !important;
                    border: 1px solid #374151 !important;
                }

                .dataframe table {
                    border-collapse: collapse !important;
                    width: 100% !important;
                    font-size: 0.875rem !important;
                }

                .dataframe thead {
                    background: #1f2937 !important;
                }

                .dataframe thead th {
                    color: #9ca3af !important;
                    font-weight: 600 !important;
                    text-align: left !important;
                    padding: 0.75rem 1rem !important;
                    border-bottom: 1px solid #374151 !important;
                    font-size: 0.75rem !important;
                    text-transform: uppercase !important;
                    letter-spacing: 0.05em !important;
                }

                .dataframe tbody tr {
                    background: #111827 !important;
                    border-bottom: 1px solid #1f2937 !important;
                    transition: background-color 0.15s ease !important;
                }

                .dataframe tbody tr:hover {
                    background: #1f2937 !important;
                }

                .dataframe tbody td {
                    padding: 0.75rem 1rem !important;
                    color: #d1d5db !important;
                    font-size: 0.875rem !important;
                    line-height: 1.5 !important;
                    max-width: 400px !important;
                    overflow: hidden !important;
                    text-overflow: ellipsis !important;
                }

                .dataframe tbody tr:last-child {
                    border-bottom: none !important;
                }
                </style>
            """)
            
            df_display = gr.Dataframe(
                headers=COLUMNS,
                datatype=["str", "str", "str"],
                interactive=False,
                wrap=True,
                label="Results",
                elem_classes="dataframe-container"
            )
            
            stats_display = gr.Markdown("Click **Load Dataset** to begin exploring")
    
    df_state = gr.State(value=pd.DataFrame())
    
    def load_and_display(dataset_name):
        df = load_preview(dataset_name)
        if "Error" in df.columns:
            return df, df, "Error loading dataset"
        stats = f"**Loaded {len(df)} rows** • Columns: {', '.join(COLUMNS)}"
        return df, df, stats
    
    load_btn.click(
        fn=load_and_display,
        inputs=dataset_dropdown,
        outputs=[df_state, df_display, stats_display]
    )
    
    def search_and_display(df, query):
        if df.empty:
            return df, "Load a dataset first"
        
        filtered_df = filter_dataframe(df, query)
        stats = f"**Showing {len(filtered_df)} of {len(df)} rows**"
        if query:
            stats += f" • Search: '{query}'"
        return filtered_df, stats
    
    search_box.change(
        fn=search_and_display,
        inputs=[df_state, search_box],
        outputs=[df_display, stats_display]
    )
    
    def send_to_model(df, row_num):
        if df.empty or row_num >= len(df):
            return "", "", "Invalid row or no data loaded"
        
        row = df.iloc[int(row_num)]
        instruction = row['english_prompt'] if 'english_prompt' in row else ""
        schema = row['create_statement'] if 'create_statement' in row else ""
        
        return instruction, schema, f"**Row {row_num} loaded!** Switch to Model Demo tab"
    
    send_to_model_btn.click(
        fn=send_to_model,
        inputs=[df_state, row_selector],
        outputs=[shared_instruction, shared_schema, stats_display]
    )
    
    return {'df_state': df_state, 'df_display': df_display}