broadfield-dev commited on
Commit
8350ba2
·
verified ·
1 Parent(s): 5aaeff4

Update templates/index.html

Browse files
Files changed (1) hide show
  1. templates/index.html +164 -94
templates/index.html CHANGED
@@ -2,145 +2,220 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
- <title>HF Dataset Command Center</title>
6
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
7
  <style>
8
- .json-badge { background-color: #ffc107; color: #000; font-size: 0.8em; padding: 2px 5px; border-radius: 4px; }
9
- .box { border: 1px solid #ddd; padding: 20px; border-radius: 8px; margin-bottom: 20px; }
10
- pre { background: #f4f4f4; padding: 10px; max-height: 200px; overflow: auto; }
 
 
 
11
  </style>
12
  </head>
13
  <body class="bg-light">
14
 
15
- <div class="container mt-5">
16
- <h1 class="mb-4">🤗 Dataset Command Center</h1>
 
 
17
 
18
- <!-- Step 1: Authentication & Load -->
19
- <div class="box bg-white">
20
- <h4>1. Source Configuration</h4>
21
  <div class="row g-3">
22
- <div class="col-md-4">
23
- <label>HF Write Token</label>
24
  <input type="password" id="token" class="form-control" placeholder="hf_...">
25
  </div>
26
- <div class="col-md-4">
27
- <label>Source Dataset ID</label>
28
- <input type="text" id="dataset_id" class="form-control" placeholder="e.g. imalexissa/scikit_learn_metadata">
 
 
 
29
  </div>
30
  <div class="col-md-2">
31
- <label>Split</label>
32
- <input type="text" id="split" class="form-control" value="train">
 
 
33
  </div>
34
- <div class="col-md-2 d-flex align-items-end">
35
- <button class="btn btn-primary w-100" onclick="inspectDataset()">Inspect</button>
 
 
 
36
  </div>
37
  </div>
 
 
 
38
  </div>
39
 
40
- <!-- Step 2: Column Analysis -->
41
- <div id="inspector-panel" class="box bg-white" style="display:none;">
42
- <h4>2. Column Inspector</h4>
43
- <p class="text-muted">Detected columns. Check "Expand JSON" to extract keys.</p>
44
- <table class="table table-sm" id="col-table">
45
- <thead>
46
- <tr>
47
- <th>Column</th>
48
- <th>Type</th>
49
- <th>Actions</th>
50
- </tr>
51
- </thead>
52
- <tbody id="col-list"></tbody>
53
- </table>
54
 
55
- <div class="mt-3">
56
- <h5>Recipe Builder</h5>
57
- <div class="row g-2">
58
- <div class="col-md-4">
59
- <label>JSON Expand (Col: keys,comma,sep)</label>
60
- <input type="text" id="json-rule" class="form-control" placeholder="meta: url, id">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  </div>
62
- <div class="col-md-4">
63
- <label>Rename (old=new)</label>
64
- <input type="text" id="rename-rule" class="form-control" placeholder="text_body=text">
 
65
  </div>
66
- <div class="col-md-4">
67
- <label>Drop Column</label>
68
- <input type="text" id="drop-rule" class="form-control" placeholder="col_name">
 
 
69
  </div>
70
- </div>
71
- <div class="mt-2">
72
- <label>Filter Expression (Python syntax)</label>
73
- <input type="text" id="filter-rule" class="form-control" placeholder="len(text) > 100">
74
  </div>
75
  </div>
76
  </div>
77
 
78
- <!-- Step 3: Preview -->
79
- <div class="box bg-white">
80
- <div class="d-flex justify-content-between">
81
- <h4>3. Preview & Execute</h4>
82
- <button class="btn btn-secondary" onclick="runPreview()">Run Dry Preview</button>
83
- </div>
84
- <div id="preview-area" class="mt-3"></div>
85
 
86
  <hr>
87
  <div class="row g-3 align-items-end">
88
- <div class="col-md-6">
89
- <label>Target Dataset ID (to push)</label>
90
  <input type="text" id="target_id" class="form-control" placeholder="username/new-dataset-name">
91
  </div>
92
  <div class="col-md-3">
93
- <label>Max Rows (Empty for all)</label>
94
- <input type="number" id="max_rows" class="form-control">
95
  </div>
96
- <div class="col-md-3">
97
- <button class="btn btn-success w-100" onclick="executeJob()">🚀 Push to Hub</button>
98
  </div>
99
  </div>
100
- <div id="job-status" class="mt-3 text-info fw-bold"></div>
101
  </div>
102
  </div>
103
 
104
  <script>
105
- let currentCols = [];
 
 
 
 
 
 
 
 
106
 
107
- async function inspectDataset() {
108
- const btn = document.querySelector('button[onclick="inspectDataset()"]');
109
- btn.disabled = true; btn.innerText = "Loading...";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  const payload = {
112
  token: document.getElementById('token').value,
113
  dataset_id: document.getElementById('dataset_id').value,
114
- split: document.getElementById('split').value
 
115
  };
116
 
117
- const res = await fetch('/inspect', {
118
  method: 'POST', headers: {'Content-Type': 'application/json'},
119
  body: JSON.stringify(payload)
120
  });
121
  const data = await res.json();
122
 
 
 
123
  if(data.status === 'success') {
124
  document.getElementById('inspector-panel').style.display = 'block';
125
  const tbody = document.getElementById('col-list');
126
  tbody.innerHTML = '';
127
 
128
  for (const [col, info] of Object.entries(data.analysis)) {
129
- let html = `<tr>
130
- <td>${col}</td>
131
- <td>${info.type} ${info.is_json ? '<span class="json-badge">JSON Detected</span>' : ''}</td>
132
- <td>
133
- <button class="btn btn-sm btn-outline-danger" onclick="addDrop('${col}')">Drop</button>
134
- </td>
135
  </tr>`;
136
- tbody.innerHTML += html;
137
  }
138
  } else {
139
- alert('Error: ' + data.message);
140
  }
141
- btn.disabled = false; btn.innerText = "Inspect";
142
  }
143
 
 
144
  function getRecipe() {
145
  const jsonRule = document.getElementById('json-rule').value;
146
  const renameRule = document.getElementById('rename-rule').value;
@@ -149,7 +224,6 @@
149
 
150
  let recipe = { json_expansions: [], renames: {}, drops: [], filters: [] };
151
 
152
- // Parse JSON rule: "meta: url, id"
153
  if(jsonRule.includes(':')) {
154
  let [col, keys] = jsonRule.split(':');
155
  recipe.json_expansions.push({
@@ -157,17 +231,11 @@
157
  keys: keys.split(',').map(k => k.trim())
158
  });
159
  }
160
-
161
- // Parse Rename: "a=b"
162
  if(renameRule.includes('=')) {
163
  let [oldC, newC] = renameRule.split('=');
164
  recipe.renames[oldC.trim()] = newC.trim();
165
  }
166
-
167
- // Parse Drops
168
  if(dropRule) recipe.drops.push(dropRule.trim());
169
-
170
- // Parse Filters
171
  if(filterRule) recipe.filters.push(filterRule.trim());
172
 
173
  return recipe;
@@ -177,7 +245,8 @@
177
  const payload = {
178
  token: document.getElementById('token').value,
179
  dataset_id: document.getElementById('dataset_id').value,
180
- split: document.getElementById('split').value,
 
181
  recipe: getRecipe()
182
  };
183
 
@@ -187,6 +256,7 @@
187
  });
188
  const data = await res.json();
189
 
 
190
  const area = document.getElementById('preview-area');
191
  if (data.status === 'success') {
192
  area.innerHTML = `<pre>${JSON.stringify(data.rows, null, 2)}</pre>`;
@@ -199,8 +269,9 @@
199
  const payload = {
200
  token: document.getElementById('token').value,
201
  dataset_id: document.getElementById('dataset_id').value,
 
 
202
  target_id: document.getElementById('target_id').value,
203
- split: document.getElementById('split').value,
204
  recipe: getRecipe(),
205
  max_rows: document.getElementById('max_rows').value
206
  };
@@ -212,6 +283,7 @@
212
  const data = await res.json();
213
 
214
  if(data.status === 'started') {
 
215
  pollStatus(data.job_id);
216
  }
217
  }
@@ -223,21 +295,19 @@
223
  const data = await res.json();
224
 
225
  if(data.status === 'running') {
226
- statusDiv.innerText = "Processing... (This runs in background)";
 
227
  } else if (data.status === 'completed') {
228
- statusDiv.innerText = `Success! Pushed ${data.result.rows_processed} rows.`;
 
229
  clearInterval(interval);
230
- } else {
231
  statusDiv.innerText = `Error: ${data.error}`;
 
232
  clearInterval(interval);
233
  }
234
  }, 2000);
235
  }
236
-
237
- function addDrop(col) {
238
- document.getElementById('drop-rule').value = col;
239
- }
240
  </script>
241
-
242
  </body>
243
  </html>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
+ <title>HF Dataset ETL Command Center</title>
6
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
7
  <style>
8
+ .json-badge { background-color: #ffc107; color: #000; font-size: 0.75em; padding: 2px 6px; border-radius: 4px; font-weight: bold; }
9
+ .box { border: 1px solid #e0e0e0; padding: 25px; border-radius: 12px; margin-bottom: 25px; background: white; box-shadow: 0 2px 5px rgba(0,0,0,0.05); }
10
+ .step-num { font-weight: bold; color: #6c757d; margin-bottom: 10px; display: block; }
11
+ pre { background: #f8f9fa; padding: 15px; max-height: 300px; overflow: auto; border-radius: 6px; border: 1px solid #eee; }
12
+ .form-label { font-weight: 500; font-size: 0.9em; }
13
+ .table-hover tbody tr:hover { background-color: #f8f9fa; }
14
  </style>
15
  </head>
16
  <body class="bg-light">
17
 
18
+ <div class="container mt-4 mb-5">
19
+ <div class="d-flex align-items-center mb-4">
20
+ <h2 class="me-3">🛠️ Hugging Face Dataset Command Center</h2>
21
+ </div>
22
 
23
+ <!-- Step 1: Configuration -->
24
+ <div class="box">
25
+ <span class="step-num">STEP 1: SOURCE SELECTION</span>
26
  <div class="row g-3">
27
+ <div class="col-md-3">
28
+ <label class="form-label">HF Write Token</label>
29
  <input type="password" id="token" class="form-control" placeholder="hf_...">
30
  </div>
31
+ <div class="col-md-5">
32
+ <label class="form-label">Dataset Repository ID</label>
33
+ <div class="input-group">
34
+ <input type="text" id="dataset_id" class="form-control" placeholder="e.g. the_stack">
35
+ <button class="btn btn-primary" onclick="analyzeMetadata()">Analyze Dataset</button>
36
+ </div>
37
  </div>
38
  <div class="col-md-2">
39
+ <label class="form-label">Config (Subset)</label>
40
+ <select id="config_select" class="form-select" onchange="updateSplits()" disabled>
41
+ <option>Select...</option>
42
+ </select>
43
  </div>
44
+ <div class="col-md-2">
45
+ <label class="form-label">Split</label>
46
+ <select id="split_select" class="form-select" disabled>
47
+ <option>Select...</option>
48
+ </select>
49
  </div>
50
  </div>
51
+ <div class="mt-3 text-end">
52
+ <button class="btn btn-outline-dark" id="load_sample_btn" onclick="inspectRows()" disabled>Load Sample Data ⬇</button>
53
+ </div>
54
  </div>
55
 
56
+ <!-- Step 2: Inspector -->
57
+ <div id="inspector-panel" class="box" style="display:none;">
58
+ <span class="step-num">STEP 2: SCHEMA & RECIPE</span>
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ <div class="row">
61
+ <div class="col-md-6">
62
+ <h5>Source Columns</h5>
63
+ <p class="text-muted small">We detected the following columns. Use the tools on the right to modify them.</p>
64
+ <table class="table table-sm table-hover" id="col-table">
65
+ <thead><tr><th>Name</th><th>Type</th></tr></thead>
66
+ <tbody id="col-list"></tbody>
67
+ </table>
68
+ </div>
69
+
70
+ <div class="col-md-6 border-start">
71
+ <h5 class="mb-3">Transformation Recipe</h5>
72
+
73
+ <div class="mb-3">
74
+ <label class="form-label">1. Extract from JSON</label>
75
+ <input type="text" id="json-rule" class="form-control mb-1" placeholder="col_name: key1, nested.key2">
76
+ <small class="text-muted">Format: <code>source_col: new_key1, meta.id</code></small>
77
+ </div>
78
+
79
+ <div class="mb-3">
80
+ <label class="form-label">2. Rename Columns</label>
81
+ <input type="text" id="rename-rule" class="form-control" placeholder="old_name=new_name">
82
  </div>
83
+
84
+ <div class="mb-3">
85
+ <label class="form-label">3. Drop Columns</label>
86
+ <input type="text" id="drop-rule" class="form-control" placeholder="col_to_delete">
87
  </div>
88
+
89
+ <div class="mb-3">
90
+ <label class="form-label">4. Filter Rows (Python Condition)</label>
91
+ <input type="text" id="filter-rule" class="form-control" placeholder="len(text) > 500">
92
+ <small class="text-muted">Condition applies to <b>transformed</b> data.</small>
93
  </div>
94
+
95
+ <button class="btn btn-success w-100 mt-2" onclick="runPreview()">Preview Transformation ▶</button>
 
 
96
  </div>
97
  </div>
98
  </div>
99
 
100
+ <!-- Step 3: Output -->
101
+ <div id="preview-panel" class="box" style="display:none;">
102
+ <span class="step-num">STEP 3: PREVIEW & PUSH</span>
103
+
104
+ <h6>Transformed Preview (First 5 Rows)</h6>
105
+ <div id="preview-area"></div>
 
106
 
107
  <hr>
108
  <div class="row g-3 align-items-end">
109
+ <div class="col-md-5">
110
+ <label class="form-label">Target Repo ID</label>
111
  <input type="text" id="target_id" class="form-control" placeholder="username/new-dataset-name">
112
  </div>
113
  <div class="col-md-3">
114
+ <label class="form-label">Max Rows (Optional)</label>
115
+ <input type="number" id="max_rows" class="form-control" placeholder="All">
116
  </div>
117
+ <div class="col-md-4">
118
+ <button class="btn btn-danger w-100" onclick="executeJob()">🚀 Launch ETL Job</button>
119
  </div>
120
  </div>
121
+ <div id="job-status" class="mt-3 p-3 rounded bg-light fw-bold text-center" style="display:none"></div>
122
  </div>
123
  </div>
124
 
125
  <script>
126
+ // --- Step 1 Functions ---
127
+ async function analyzeMetadata() {
128
+ const btn = document.querySelector('button[onclick="analyzeMetadata()"]');
129
+ btn.disabled = true; btn.innerText = "Fetching...";
130
+
131
+ const payload = {
132
+ token: document.getElementById('token').value,
133
+ dataset_id: document.getElementById('dataset_id').value
134
+ };
135
 
136
+ try {
137
+ const res = await fetch('/analyze_metadata', {
138
+ method: 'POST', headers: {'Content-Type': 'application/json'},
139
+ body: JSON.stringify(payload)
140
+ });
141
+ const data = await res.json();
142
+
143
+ if(data.status === 'success') {
144
+ // Populate Configs
145
+ const configSel = document.getElementById('config_select');
146
+ configSel.innerHTML = '';
147
+ data.configs.forEach(c => {
148
+ configSel.innerHTML += `<option value="${c}">${c}</option>`;
149
+ });
150
+ configSel.disabled = false;
151
+
152
+ // Populate Splits (for first config)
153
+ populateSplits(data.splits);
154
+ document.getElementById('load_sample_btn').disabled = false;
155
+ } else {
156
+ alert('Error: ' + data.message);
157
+ }
158
+ } catch(e) { alert(e); }
159
 
160
+ btn.disabled = false; btn.innerText = "Analyze Dataset";
161
+ }
162
+
163
+ function populateSplits(splits) {
164
+ const splitSel = document.getElementById('split_select');
165
+ splitSel.innerHTML = '';
166
+ splits.forEach(s => {
167
+ splitSel.innerHTML += `<option value="${s}">${s}</option>`;
168
+ });
169
+ splitSel.disabled = false;
170
+ }
171
+
172
+ async function updateSplits() {
173
+ const conf = document.getElementById('config_select').value;
174
+ const ds = document.getElementById('dataset_id').value;
175
+ const res = await fetch('/get_splits', {
176
+ method: 'POST', headers: {'Content-Type': 'application/json'},
177
+ body: JSON.stringify({dataset_id: ds, config: conf, token: document.getElementById('token').value})
178
+ });
179
+ const data = await res.json();
180
+ if(data.status === 'success') populateSplits(data.splits);
181
+ }
182
+
183
+ // --- Step 2 Functions ---
184
+ async function inspectRows() {
185
+ document.getElementById('load_sample_btn').innerText = "Loading...";
186
  const payload = {
187
  token: document.getElementById('token').value,
188
  dataset_id: document.getElementById('dataset_id').value,
189
+ config: document.getElementById('config_select').value,
190
+ split: document.getElementById('split_select').value
191
  };
192
 
193
+ const res = await fetch('/inspect_rows', {
194
  method: 'POST', headers: {'Content-Type': 'application/json'},
195
  body: JSON.stringify(payload)
196
  });
197
  const data = await res.json();
198
 
199
+ document.getElementById('load_sample_btn').innerText = "Load Sample Data ⬇";
200
+
201
  if(data.status === 'success') {
202
  document.getElementById('inspector-panel').style.display = 'block';
203
  const tbody = document.getElementById('col-list');
204
  tbody.innerHTML = '';
205
 
206
  for (const [col, info] of Object.entries(data.analysis)) {
207
+ let badge = info.is_json_string ? '<span class="json-badge">JSON STR</span>' : '';
208
+ tbody.innerHTML += `<tr>
209
+ <td><b>${col}</b></td>
210
+ <td>${info.type} ${badge}</td>
 
 
211
  </tr>`;
 
212
  }
213
  } else {
214
+ alert(data.message);
215
  }
 
216
  }
217
 
218
+ // --- Step 3 Functions ---
219
  function getRecipe() {
220
  const jsonRule = document.getElementById('json-rule').value;
221
  const renameRule = document.getElementById('rename-rule').value;
 
224
 
225
  let recipe = { json_expansions: [], renames: {}, drops: [], filters: [] };
226
 
 
227
  if(jsonRule.includes(':')) {
228
  let [col, keys] = jsonRule.split(':');
229
  recipe.json_expansions.push({
 
231
  keys: keys.split(',').map(k => k.trim())
232
  });
233
  }
 
 
234
  if(renameRule.includes('=')) {
235
  let [oldC, newC] = renameRule.split('=');
236
  recipe.renames[oldC.trim()] = newC.trim();
237
  }
 
 
238
  if(dropRule) recipe.drops.push(dropRule.trim());
 
 
239
  if(filterRule) recipe.filters.push(filterRule.trim());
240
 
241
  return recipe;
 
245
  const payload = {
246
  token: document.getElementById('token').value,
247
  dataset_id: document.getElementById('dataset_id').value,
248
+ config: document.getElementById('config_select').value,
249
+ split: document.getElementById('split_select').value,
250
  recipe: getRecipe()
251
  };
252
 
 
256
  });
257
  const data = await res.json();
258
 
259
+ document.getElementById('preview-panel').style.display = 'block';
260
  const area = document.getElementById('preview-area');
261
  if (data.status === 'success') {
262
  area.innerHTML = `<pre>${JSON.stringify(data.rows, null, 2)}</pre>`;
 
269
  const payload = {
270
  token: document.getElementById('token').value,
271
  dataset_id: document.getElementById('dataset_id').value,
272
+ config: document.getElementById('config_select').value,
273
+ split: document.getElementById('split_select').value,
274
  target_id: document.getElementById('target_id').value,
 
275
  recipe: getRecipe(),
276
  max_rows: document.getElementById('max_rows').value
277
  };
 
283
  const data = await res.json();
284
 
285
  if(data.status === 'started') {
286
+ document.getElementById('job-status').style.display = 'block';
287
  pollStatus(data.job_id);
288
  }
289
  }
 
295
  const data = await res.json();
296
 
297
  if(data.status === 'running') {
298
+ statusDiv.innerText = "Processing... (Checking every 2s)";
299
+ statusDiv.className = "mt-3 p-3 rounded bg-info text-white fw-bold text-center";
300
  } else if (data.status === 'completed') {
301
+ statusDiv.innerText = `Success! Pushed ${data.result.rows_processed} rows to Hub.`;
302
+ statusDiv.className = "mt-3 p-3 rounded bg-success text-white fw-bold text-center";
303
  clearInterval(interval);
304
+ } else if (data.status === 'failed') {
305
  statusDiv.innerText = `Error: ${data.error}`;
306
+ statusDiv.className = "mt-3 p-3 rounded bg-danger text-white fw-bold text-center";
307
  clearInterval(interval);
308
  }
309
  }, 2000);
310
  }
 
 
 
 
311
  </script>
 
312
  </body>
313
  </html>