broadfield-dev commited on
Commit
8603196
·
verified ·
1 Parent(s): 3f98bbe

Create templates/index.html

Browse files
Files changed (1) hide show
  1. templates/index.html +243 -0
templates/index.html ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>HF Dataset Command Center</title>
6
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
7
+ <style>
8
+ .json-badge { background-color: #ffc107; color: #000; font-size: 0.8em; padding: 2px 5px; border-radius: 4px; }
9
+ .box { border: 1px solid #ddd; padding: 20px; border-radius: 8px; margin-bottom: 20px; }
10
+ pre { background: #f4f4f4; padding: 10px; max-height: 200px; overflow: auto; }
11
+ </style>
12
+ </head>
13
+ <body class="bg-light">
14
+
15
+ <div class="container mt-5">
16
+ <h1 class="mb-4">🤗 Dataset Command Center</h1>
17
+
18
+ <!-- Step 1: Authentication & Load -->
19
+ <div class="box bg-white">
20
+ <h4>1. Source Configuration</h4>
21
+ <div class="row g-3">
22
+ <div class="col-md-4">
23
+ <label>HF Write Token</label>
24
+ <input type="password" id="token" class="form-control" placeholder="hf_...">
25
+ </div>
26
+ <div class="col-md-4">
27
+ <label>Source Dataset ID</label>
28
+ <input type="text" id="dataset_id" class="form-control" placeholder="e.g. imalexissa/scikit_learn_metadata">
29
+ </div>
30
+ <div class="col-md-2">
31
+ <label>Split</label>
32
+ <input type="text" id="split" class="form-control" value="train">
33
+ </div>
34
+ <div class="col-md-2 d-flex align-items-end">
35
+ <button class="btn btn-primary w-100" onclick="inspectDataset()">Inspect</button>
36
+ </div>
37
+ </div>
38
+ </div>
39
+
40
+ <!-- Step 2: Column Analysis -->
41
+ <div id="inspector-panel" class="box bg-white" style="display:none;">
42
+ <h4>2. Column Inspector</h4>
43
+ <p class="text-muted">Detected columns. Check "Expand JSON" to extract keys.</p>
44
+ <table class="table table-sm" id="col-table">
45
+ <thead>
46
+ <tr>
47
+ <th>Column</th>
48
+ <th>Type</th>
49
+ <th>Actions</th>
50
+ </tr>
51
+ </thead>
52
+ <tbody id="col-list"></tbody>
53
+ </table>
54
+
55
+ <div class="mt-3">
56
+ <h5>Recipe Builder</h5>
57
+ <div class="row g-2">
58
+ <div class="col-md-4">
59
+ <label>JSON Expand (Col: keys,comma,sep)</label>
60
+ <input type="text" id="json-rule" class="form-control" placeholder="meta: url, id">
61
+ </div>
62
+ <div class="col-md-4">
63
+ <label>Rename (old=new)</label>
64
+ <input type="text" id="rename-rule" class="form-control" placeholder="text_body=text">
65
+ </div>
66
+ <div class="col-md-4">
67
+ <label>Drop Column</label>
68
+ <input type="text" id="drop-rule" class="form-control" placeholder="col_name">
69
+ </div>
70
+ </div>
71
+ <div class="mt-2">
72
+ <label>Filter Expression (Python syntax)</label>
73
+ <input type="text" id="filter-rule" class="form-control" placeholder="len(text) > 100">
74
+ </div>
75
+ </div>
76
+ </div>
77
+
78
+ <!-- Step 3: Preview -->
79
+ <div class="box bg-white">
80
+ <div class="d-flex justify-content-between">
81
+ <h4>3. Preview & Execute</h4>
82
+ <button class="btn btn-secondary" onclick="runPreview()">Run Dry Preview</button>
83
+ </div>
84
+ <div id="preview-area" class="mt-3"></div>
85
+
86
+ <hr>
87
+ <div class="row g-3 align-items-end">
88
+ <div class="col-md-6">
89
+ <label>Target Dataset ID (to push)</label>
90
+ <input type="text" id="target_id" class="form-control" placeholder="username/new-dataset-name">
91
+ </div>
92
+ <div class="col-md-3">
93
+ <label>Max Rows (Empty for all)</label>
94
+ <input type="number" id="max_rows" class="form-control">
95
+ </div>
96
+ <div class="col-md-3">
97
+ <button class="btn btn-success w-100" onclick="executeJob()">🚀 Push to Hub</button>
98
+ </div>
99
+ </div>
100
+ <div id="job-status" class="mt-3 text-info fw-bold"></div>
101
+ </div>
102
+ </div>
103
+
104
+ <script>
105
+ let currentCols = [];
106
+
107
+ async function inspectDataset() {
108
+ const btn = document.querySelector('button[onclick="inspectDataset()"]');
109
+ btn.disabled = true; btn.innerText = "Loading...";
110
+
111
+ const payload = {
112
+ token: document.getElementById('token').value,
113
+ dataset_id: document.getElementById('dataset_id').value,
114
+ split: document.getElementById('split').value
115
+ };
116
+
117
+ const res = await fetch('/inspect', {
118
+ method: 'POST', headers: {'Content-Type': 'application/json'},
119
+ body: JSON.stringify(payload)
120
+ });
121
+ const data = await res.json();
122
+
123
+ if(data.status === 'success') {
124
+ document.getElementById('inspector-panel').style.display = 'block';
125
+ const tbody = document.getElementById('col-list');
126
+ tbody.innerHTML = '';
127
+
128
+ for (const [col, info] of Object.entries(data.analysis)) {
129
+ let html = `<tr>
130
+ <td>${col}</td>
131
+ <td>${info.type} ${info.is_json ? '<span class="json-badge">JSON Detected</span>' : ''}</td>
132
+ <td>
133
+ <button class="btn btn-sm btn-outline-danger" onclick="addDrop('${col}')">Drop</button>
134
+ </td>
135
+ </tr>`;
136
+ tbody.innerHTML += html;
137
+ }
138
+ } else {
139
+ alert('Error: ' + data.message);
140
+ }
141
+ btn.disabled = false; btn.innerText = "Inspect";
142
+ }
143
+
144
+ function getRecipe() {
145
+ const jsonRule = document.getElementById('json-rule').value;
146
+ const renameRule = document.getElementById('rename-rule').value;
147
+ const dropRule = document.getElementById('drop-rule').value;
148
+ const filterRule = document.getElementById('filter-rule').value;
149
+
150
+ let recipe = { json_expansions: [], renames: {}, drops: [], filters: [] };
151
+
152
+ // Parse JSON rule: "meta: url, id"
153
+ if(jsonRule.includes(':')) {
154
+ let [col, keys] = jsonRule.split(':');
155
+ recipe.json_expansions.push({
156
+ col: col.trim(),
157
+ keys: keys.split(',').map(k => k.trim())
158
+ });
159
+ }
160
+
161
+ // Parse Rename: "a=b"
162
+ if(renameRule.includes('=')) {
163
+ let [oldC, newC] = renameRule.split('=');
164
+ recipe.renames[oldC.trim()] = newC.trim();
165
+ }
166
+
167
+ // Parse Drops
168
+ if(dropRule) recipe.drops.push(dropRule.trim());
169
+
170
+ // Parse Filters
171
+ if(filterRule) recipe.filters.push(filterRule.trim());
172
+
173
+ return recipe;
174
+ }
175
+
176
+ async function runPreview() {
177
+ const payload = {
178
+ token: document.getElementById('token').value,
179
+ dataset_id: document.getElementById('dataset_id').value,
180
+ split: document.getElementById('split').value,
181
+ recipe: getRecipe()
182
+ };
183
+
184
+ const res = await fetch('/preview', {
185
+ method: 'POST', headers: {'Content-Type': 'application/json'},
186
+ body: JSON.stringify(payload)
187
+ });
188
+ const data = await res.json();
189
+
190
+ const area = document.getElementById('preview-area');
191
+ if (data.status === 'success') {
192
+ area.innerHTML = `<pre>${JSON.stringify(data.rows, null, 2)}</pre>`;
193
+ } else {
194
+ area.innerHTML = `<div class="text-danger">${data.message}</div>`;
195
+ }
196
+ }
197
+
198
+ async function executeJob() {
199
+ const payload = {
200
+ token: document.getElementById('token').value,
201
+ dataset_id: document.getElementById('dataset_id').value,
202
+ target_id: document.getElementById('target_id').value,
203
+ split: document.getElementById('split').value,
204
+ recipe: getRecipe(),
205
+ max_rows: document.getElementById('max_rows').value
206
+ };
207
+
208
+ const res = await fetch('/execute', {
209
+ method: 'POST', headers: {'Content-Type': 'application/json'},
210
+ body: JSON.stringify(payload)
211
+ });
212
+ const data = await res.json();
213
+
214
+ if(data.status === 'started') {
215
+ pollStatus(data.job_id);
216
+ }
217
+ }
218
+
219
+ function pollStatus(jobId) {
220
+ const statusDiv = document.getElementById('job-status');
221
+ const interval = setInterval(async () => {
222
+ const res = await fetch(`/status/${jobId}`);
223
+ const data = await res.json();
224
+
225
+ if(data.status === 'running') {
226
+ statusDiv.innerText = "Processing... (This runs in background)";
227
+ } else if (data.status === 'completed') {
228
+ statusDiv.innerText = `Success! Pushed ${data.result.rows_processed} rows.`;
229
+ clearInterval(interval);
230
+ } else {
231
+ statusDiv.innerText = `Error: ${data.error}`;
232
+ clearInterval(interval);
233
+ }
234
+ }, 2000);
235
+ }
236
+
237
+ function addDrop(col) {
238
+ document.getElementById('drop-rule').value = col;
239
+ }
240
+ </script>
241
+
242
+ </body>
243
+ </html>