BrotherTony commited on
Commit
de03dab
·
verified ·
1 Parent(s): 648a1e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +448 -40
app.py CHANGED
@@ -3,87 +3,495 @@ import requests
3
  import random
4
 
5
  def get_random_dataset(min_likes=None, max_likes=None, task=None, language=None):
6
- """Get a single random dataset from HuggingFace with optional filters"""
7
-
8
  try:
9
- # Build API request
10
  url = "https://huggingface.co/api/datasets"
11
  params = {"limit": 100, "full": "true"}
12
 
13
- # Add task filter if provided
14
- if task:
15
- params["filter"] = task
16
 
17
- # Fetch datasets
18
  response = requests.get(url, params=params, timeout=30)
19
  response.raise_for_status()
20
  datasets = response.json()
21
 
22
- # Apply filters
23
  filtered = []
24
  for ds in datasets:
25
  likes = ds.get("likes", 0)
26
 
27
- # Check likes range
28
- if min_likes and likes < min_likes:
29
  continue
30
- if max_likes and likes > max_likes:
31
  continue
32
 
33
- # Check language
34
- if language:
35
  langs = ds.get("cardData", {}).get("language", [])
36
  if isinstance(langs, str):
37
  langs = [langs]
38
- if language not in langs:
39
  continue
40
 
41
  filtered.append(ds)
42
 
43
  if not filtered:
44
- return "No datasets found matching your filters. Try adjusting them."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Pick random dataset
47
  dataset = random.choice(filtered)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Format output
50
- result = f"""## {dataset.get('id', 'Unknown')}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- **Likes:** {dataset.get('likes', 0):,}
53
- **Downloads:** {dataset.get('downloads', 0):,}
54
- **Author:** {dataset.get('author', 'Unknown')}
55
 
56
- **Link:** https://huggingface.co/datasets/{dataset.get('id', '')}
 
 
57
 
58
- **Description:**
59
- {dataset.get('cardData', {}).get('description', 'No description available')[:300]}...
 
 
60
 
61
- **Tags:** {', '.join(dataset.get('tags', [])[:8])}
62
- """
63
- return result
64
-
65
- except Exception as e:
66
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
68
 
69
- with gr.Blocks(title="Random HF Dataset Picker") as app:
70
- gr.Markdown("#Random HuggingFace Dataset Picker")
71
- gr.Markdown("Pick a completely random dataset with optional filters")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- with gr.Row():
74
- min_likes = gr.Number(label="Min Likes (optional)", precision=0)
75
- max_likes = gr.Number(label="Max Likes (optional)", precision=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- task = gr.Textbox(label="Task (optional)", placeholder="e.g., text-classification")
78
- language = gr.Textbox(label="Language Code (optional)", placeholder="e.g., en")
79
 
80
- btn = gr.Button("Pick Random Dataset", variant="primary")
81
- output = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  btn.click(
84
  fn=get_random_dataset,
85
  inputs=[min_likes, max_likes, task, language],
86
  outputs=output
87
  )
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  app.launch()
 
3
  import random
4
 
5
  def get_random_dataset(min_likes=None, max_likes=None, task=None, language=None):
 
 
6
  try:
 
7
  url = "https://huggingface.co/api/datasets"
8
  params = {"limit": 100, "full": "true"}
9
 
10
+ if task and task.strip():
11
+ params["filter"] = task.strip()
 
12
 
 
13
  response = requests.get(url, params=params, timeout=30)
14
  response.raise_for_status()
15
  datasets = response.json()
16
 
 
17
  filtered = []
18
  for ds in datasets:
19
  likes = ds.get("likes", 0)
20
 
21
+ if min_likes is not None and likes < min_likes:
 
22
  continue
23
+ if max_likes is not None and likes > max_likes:
24
  continue
25
 
26
+ if language and language.strip():
 
27
  langs = ds.get("cardData", {}).get("language", [])
28
  if isinstance(langs, str):
29
  langs = [langs]
30
+ if language.strip() not in langs:
31
  continue
32
 
33
  filtered.append(ds)
34
 
35
  if not filtered:
36
+ return """
37
+ <div class='result-container empty-state'>
38
+ <div class='empty-icon'>
39
+ <svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
40
+ <circle cx="12" cy="12" r="10"/>
41
+ <path d="M12 6v6l4 2"/>
42
+ </svg>
43
+ </div>
44
+ <h3>No Results Found</h3>
45
+ <p>Try adjusting your filters to discover datasets</p>
46
+ </div>
47
+ <script>
48
+ new Audio('https://assets.mixkit.co/active_storage/sfx/2572/2572-preview.mp3').play().catch(() => {});
49
+ </script>
50
+ """
51
 
 
52
  dataset = random.choice(filtered)
53
+ dataset_id = dataset.get('id', 'Unknown')
54
+ likes = dataset.get('likes', 0)
55
+ downloads = dataset.get('downloads', 0)
56
+ author = dataset.get('author', 'Unknown')
57
+ description = dataset.get('cardData', {}).get('description', 'No description available')
58
+ tags = dataset.get('tags', [])[:6]
59
+
60
+ tags_html = ''.join([f"<span class='tag'>{tag}</span>" for tag in tags])
61
+
62
+ result = f"""
63
+ <div class='result-container success-state'>
64
+ <div class='result-header'>
65
+ <h2 class='dataset-title'>{dataset_id}</h2>
66
+ <p class='dataset-author'>by {author}</p>
67
+ </div>
68
+
69
+ <div class='stats-grid'>
70
+ <div class='stat-card stat-primary'>
71
+ <div class='stat-label'>Likes</div>
72
+ <div class='stat-value'>{likes:,}</div>
73
+ </div>
74
+ <div class='stat-card stat-secondary'>
75
+ <div class='stat-label'>Downloads</div>
76
+ <div class='stat-value'>{downloads:,}</div>
77
+ </div>
78
+ </div>
79
+
80
+ <div class='description-section'>
81
+ <h3>Description</h3>
82
+ <p>{description[:400]}{'...' if len(description) > 400 else ''}</p>
83
+ </div>
84
+
85
+ <div class='tags-section'>
86
+ <h3>Tags</h3>
87
+ <div class='tags-container'>{tags_html if tags_html else '<span class="no-tags">No tags available</span>'}</div>
88
+ </div>
89
+
90
+ <a href='https://huggingface.co/datasets/{dataset_id}' target='_blank' class='view-button'>
91
+ View on HuggingFace
92
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
93
+ <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/>
94
+ <polyline points="15 3 21 3 21 9"/>
95
+ <line x1="10" y1="14" x2="21" y2="3"/>
96
+ </svg>
97
+ </a>
98
+ </div>
99
+ <script>
100
+ new Audio('https://assets.mixkit.co/active_storage/sfx/2570/2570-preview.mp3').play().catch(() => {});
101
+ </script>
102
+ """
103
+ return result
104
 
105
+ except Exception as e:
106
+ return f"""
107
+ <div class='result-container error-state'>
108
+ <div class='error-icon'>
109
+ <svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
110
+ <circle cx="12" cy="12" r="10"/>
111
+ <line x1="15" y1="9" x2="9" y2="15"/>
112
+ <line x1="9" y1="9" x2="15" y2="15"/>
113
+ </svg>
114
+ </div>
115
+ <h3>Error</h3>
116
+ <p>{str(e)}</p>
117
+ </div>
118
+ <script>
119
+ new Audio('https://assets.mixkit.co/active_storage/sfx/2573/2573-preview.mp3').play().catch(() => {});
120
+ </script>
121
+ """
122
 
123
+ custom_css = """
124
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
 
125
 
126
+ * {
127
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
128
+ }
129
 
130
+ .gradio-container {
131
+ max-width: 920px !important;
132
+ margin: 0 auto !important;
133
+ }
134
 
135
+ body {
136
+ background: #FAFBFC;
137
+ }
138
+
139
+ .header-section {
140
+ text-align: center;
141
+ padding: 56px 24px 40px;
142
+ background: linear-gradient(135deg, #1E3A8A 0%, #1E40AF 100%);
143
+ border-radius: 20px;
144
+ margin-bottom: 40px;
145
+ position: relative;
146
+ overflow: hidden;
147
+ }
148
+
149
+ .header-section::before {
150
+ content: '';
151
+ position: absolute;
152
+ top: 0;
153
+ left: 0;
154
+ right: 0;
155
+ bottom: 0;
156
+ background: radial-gradient(circle at 30% 50%, rgba(59, 130, 246, 0.2) 0%, transparent 50%),
157
+ radial-gradient(circle at 70% 50%, rgba(147, 51, 234, 0.2) 0%, transparent 50%);
158
+ pointer-events: none;
159
+ }
160
+
161
+ .main-title {
162
+ color: white;
163
+ font-size: 42px;
164
+ font-weight: 700;
165
+ margin: 0 0 12px 0;
166
+ letter-spacing: -1px;
167
+ position: relative;
168
+ z-index: 1;
169
+ }
170
+
171
+ .main-subtitle {
172
+ color: rgba(255, 255, 255, 0.85);
173
+ font-size: 17px;
174
+ margin: 0;
175
+ font-weight: 400;
176
+ position: relative;
177
+ z-index: 1;
178
+ }
179
+
180
+ .filter-container {
181
+ background: white;
182
+ padding: 32px;
183
+ border-radius: 16px;
184
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
185
+ margin-bottom: 24px;
186
+ border: 1px solid #E5E7EB;
187
+ }
188
+
189
+ .filter-label {
190
+ color: #1F2937;
191
+ font-size: 16px;
192
+ font-weight: 600;
193
+ margin-bottom: 24px;
194
+ display: block;
195
+ }
196
+
197
+ label {
198
+ color: #374151 !important;
199
+ font-size: 14px !important;
200
+ font-weight: 500 !important;
201
+ margin-bottom: 8px !important;
202
+ }
203
+
204
+ input[type="number"], input[type="text"] {
205
+ border: 1.5px solid #D1D5DB !important;
206
+ border-radius: 10px !important;
207
+ padding: 12px 14px !important;
208
+ font-size: 15px !important;
209
+ transition: all 0.2s ease !important;
210
+ background: white !important;
211
+ }
212
+
213
+ input[type="number"]:focus, input[type="text"]:focus {
214
+ border-color: #3B82F6 !important;
215
+ box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1) !important;
216
+ outline: none !important;
217
+ }
218
+
219
+ button.primary {
220
+ background: linear-gradient(135deg, #1E3A8A 0%, #1E40AF 100%) !important;
221
+ border: none !important;
222
+ color: white !important;
223
+ font-size: 16px !important;
224
+ font-weight: 600 !important;
225
+ padding: 16px 40px !important;
226
+ border-radius: 12px !important;
227
+ cursor: pointer !important;
228
+ transition: all 0.25s cubic-bezier(0.4, 0, 0.2, 1) !important;
229
+ box-shadow: 0 4px 12px rgba(30, 58, 138, 0.3) !important;
230
+ width: 100% !important;
231
+ margin-top: 8px !important;
232
+ }
233
+
234
+ button.primary:hover {
235
+ transform: translateY(-2px) !important;
236
+ box-shadow: 0 8px 20px rgba(30, 58, 138, 0.4) !important;
237
+ }
238
+
239
+ button.primary:active {
240
+ transform: translateY(0px) !important;
241
+ }
242
+
243
+ .result-container {
244
+ background: white;
245
+ border-radius: 16px;
246
+ padding: 40px;
247
+ margin-top: 24px;
248
+ border: 1px solid #E5E7EB;
249
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
250
+ }
251
+
252
+ .empty-state, .error-state {
253
+ text-align: center;
254
+ padding: 60px 40px;
255
+ }
256
+
257
+ .empty-icon, .error-icon {
258
+ color: #9CA3AF;
259
+ margin-bottom: 20px;
260
+ display: flex;
261
+ justify-content: center;
262
+ }
263
 
264
+ .empty-state h3, .error-state h3 {
265
+ color: #1F2937;
266
+ font-size: 20px;
267
+ font-weight: 600;
268
+ margin: 0 0 8px 0;
269
+ }
270
 
271
+ .empty-state p, .error-state p {
272
+ color: #6B7280;
273
+ font-size: 15px;
274
+ margin: 0;
275
+ }
276
+
277
+ .success-state {
278
+ animation: slideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
279
+ }
280
+
281
+ @keyframes slideIn {
282
+ from {
283
+ opacity: 0;
284
+ transform: translateY(20px);
285
+ }
286
+ to {
287
+ opacity: 1;
288
+ transform: translateY(0);
289
+ }
290
+ }
291
+
292
+ .result-header {
293
+ border-bottom: 1px solid #F3F4F6;
294
+ padding-bottom: 24px;
295
+ margin-bottom: 28px;
296
+ }
297
+
298
+ .dataset-title {
299
+ color: #111827;
300
+ margin: 0 0 8px 0;
301
+ font-size: 26px;
302
+ font-weight: 700;
303
+ letter-spacing: -0.5px;
304
+ }
305
+
306
+ .dataset-author {
307
+ color: #6B7280;
308
+ margin: 0;
309
+ font-size: 15px;
310
+ font-weight: 500;
311
+ }
312
+
313
+ .stats-grid {
314
+ display: grid;
315
+ grid-template-columns: repeat(2, 1fr);
316
+ gap: 16px;
317
+ margin-bottom: 32px;
318
+ }
319
+
320
+ .stat-card {
321
+ padding: 20px;
322
+ border-radius: 12px;
323
+ text-align: center;
324
+ transition: transform 0.2s ease;
325
+ }
326
+
327
+ .stat-card:hover {
328
+ transform: translateY(-2px);
329
+ }
330
+
331
+ .stat-primary {
332
+ background: linear-gradient(135deg, #1E3A8A 0%, #1E40AF 100%);
333
+ }
334
+
335
+ .stat-secondary {
336
+ background: linear-gradient(135deg, #0891B2 0%, #06B6D4 100%);
337
+ }
338
+
339
+ .stat-label {
340
+ color: rgba(255, 255, 255, 0.85);
341
+ font-size: 13px;
342
+ margin-bottom: 6px;
343
+ font-weight: 500;
344
+ text-transform: uppercase;
345
+ letter-spacing: 0.5px;
346
+ }
347
+
348
+ .stat-value {
349
+ color: white;
350
+ font-size: 28px;
351
+ font-weight: 700;
352
+ }
353
+
354
+ .description-section, .tags-section {
355
+ margin-bottom: 28px;
356
+ }
357
+
358
+ .description-section h3, .tags-section h3 {
359
+ color: #111827;
360
+ font-size: 16px;
361
+ font-weight: 600;
362
+ margin: 0 0 12px 0;
363
+ }
364
+
365
+ .description-section p {
366
+ color: #4B5563;
367
+ line-height: 1.7;
368
+ font-size: 15px;
369
+ margin: 0;
370
+ }
371
+
372
+ .tags-container {
373
+ display: flex;
374
+ flex-wrap: wrap;
375
+ gap: 8px;
376
+ }
377
+
378
+ .tag {
379
+ display: inline-block;
380
+ background: #EFF6FF;
381
+ color: #1E40AF;
382
+ padding: 6px 14px;
383
+ border-radius: 20px;
384
+ font-size: 13px;
385
+ font-weight: 500;
386
+ border: 1px solid #DBEAFE;
387
+ }
388
+
389
+ .no-tags {
390
+ color: #9CA3AF;
391
+ font-size: 14px;
392
+ }
393
+
394
+ .view-button {
395
+ display: inline-flex;
396
+ align-items: center;
397
+ gap: 8px;
398
+ background: linear-gradient(135deg, #1E3A8A 0%, #1E40AF 100%);
399
+ color: white;
400
+ padding: 14px 28px;
401
+ border-radius: 10px;
402
+ text-decoration: none;
403
+ font-weight: 600;
404
+ font-size: 15px;
405
+ transition: all 0.25s cubic-bezier(0.4, 0, 0.2, 1);
406
+ box-shadow: 0 2px 8px rgba(30, 58, 138, 0.25);
407
+ }
408
+
409
+ .view-button:hover {
410
+ transform: translateY(-2px);
411
+ box-shadow: 0 4px 12px rgba(30, 58, 138, 0.35);
412
+ text-decoration: none;
413
+ }
414
+
415
+ .footer-note {
416
+ text-align: center;
417
+ color: #6B7280;
418
+ font-size: 14px;
419
+ margin-top: 32px;
420
+ padding: 20px;
421
+ font-weight: 400;
422
+ }
423
+ """
424
+
425
+ with gr.Blocks(css=custom_css, title="Random Dataset Picker", theme=gr.themes.Soft()) as app:
426
+ gr.HTML("""
427
+ <div class='header-section'>
428
+ <h1 class='main-title'>Random Dataset Picker</h1>
429
+ <p class='main-subtitle'>Discover datasets from HuggingFace with customisable filters</p>
430
+ </div>
431
+ """)
432
 
433
+ with gr.Group():
434
+ gr.HTML("<div class='filter-container'><span class='filter-label'>Filter Options</span></div>")
435
+
436
+ with gr.Row():
437
+ min_likes = gr.Number(
438
+ label="Minimum Likes",
439
+ precision=0,
440
+ container=True,
441
+ scale=1
442
+ )
443
+ max_likes = gr.Number(
444
+ label="Maximum Likes",
445
+ precision=0,
446
+ container=True,
447
+ scale=1
448
+ )
449
+
450
+ with gr.Row():
451
+ task = gr.Textbox(
452
+ label="Task Category",
453
+ placeholder="e.g., text-classification, image-classification",
454
+ container=True,
455
+ scale=1
456
+ )
457
+ language = gr.Textbox(
458
+ label="Language Code",
459
+ placeholder="e.g., en, fr, de, zh",
460
+ container=True,
461
+ scale=1
462
+ )
463
 
464
+ btn = gr.Button("Generate Random Dataset", variant="primary", size="lg", elem_classes=["generate-btn"])
 
465
 
466
+ output = gr.HTML(
467
+ value="""<div class='result-container empty-state'>
468
+ <div class='empty-icon'>
469
+ <svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
470
+ <path d="M21 16V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16z"/>
471
+ <polyline points="3.27 6.96 12 12.01 20.73 6.96"/>
472
+ <line x1="12" y1="22.08" x2="12" y2="12"/>
473
+ </svg>
474
+ </div>
475
+ <h3>Ready to Explore</h3>
476
+ <p>Click the button above to discover a random dataset</p>
477
+ </div>"""
478
+ )
479
 
480
  btn.click(
481
  fn=get_random_dataset,
482
  inputs=[min_likes, max_likes, task, language],
483
  outputs=output
484
  )
485
+
486
+ gr.HTML("<div class='footer-note'>All filters are optional — Leave empty for completely random selection</div>")
487
+
488
+ gr.HTML("""
489
+ <script>
490
+ document.querySelector('.generate-btn').addEventListener('click', function() {
491
+ const audio = new Audio('https://assets.mixkit.co/active_storage/sfx/2568/2568-preview.mp3');
492
+ audio.play().catch(() => {});
493
+ });
494
+ </script>
495
+ """)
496
 
497
  app.launch()