danielrosehill commited on
Commit
62cd7ca
Β·
1 Parent(s): 5f609a0
Files changed (2) hide show
  1. app.py +372 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import json
4
+ import pandas as pd
5
+
6
+ # Load the dataset
7
+ dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy")
8
+
9
+ # Extract taxonomy data
10
+ taxonomy_data = {}
11
+ for split_name in dataset.keys():
12
+ if split_name.startswith("taxonomy_"):
13
+ # Parse the split name to get modality and operation type
14
+ parts = split_name.replace("taxonomy_", "").split("_")
15
+ if len(parts) >= 3:
16
+ modality_parts = parts[:-1]
17
+ operation = parts[-1]
18
+ modality = "_".join(modality_parts)
19
+
20
+ if modality not in taxonomy_data:
21
+ taxonomy_data[modality] = {}
22
+
23
+ # Get the modalities from this split
24
+ data = dataset[split_name]
25
+ if len(data) > 0:
26
+ taxonomy_data[modality][operation] = json.loads(data[0]['json'])
27
+
28
+ # Define modality display names and emojis
29
+ MODALITY_INFO = {
30
+ "video_generation": {"name": "Video Generation", "emoji": "🎬", "color": "#FF6B6B"},
31
+ "audio_generation": {"name": "Audio Generation", "emoji": "🎡", "color": "#4ECDC4"},
32
+ "image_generation": {"name": "Image Generation", "emoji": "πŸ–ΌοΈ", "color": "#95E1D3"},
33
+ "text_generation": {"name": "Text Generation", "emoji": "πŸ“", "color": "#F38181"},
34
+ "3d_generation": {"name": "3D Generation", "emoji": "🎨", "color": "#AA96DA"},
35
+ }
36
+
37
+ # CSS for styling
38
+ custom_css = """
39
+ .modality-card {
40
+ border: 2px solid #e0e0e0;
41
+ border-radius: 10px;
42
+ padding: 20px;
43
+ margin: 10px 0;
44
+ background: white;
45
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
46
+ }
47
+ .modality-header {
48
+ font-size: 1.5em;
49
+ font-weight: bold;
50
+ margin-bottom: 10px;
51
+ color: #333;
52
+ }
53
+ .modality-meta {
54
+ background: #f5f5f5;
55
+ padding: 10px;
56
+ border-radius: 5px;
57
+ margin: 10px 0;
58
+ }
59
+ .badge {
60
+ display: inline-block;
61
+ padding: 4px 12px;
62
+ border-radius: 12px;
63
+ margin: 2px;
64
+ font-size: 0.85em;
65
+ font-weight: 500;
66
+ }
67
+ .badge-mature { background: #4CAF50; color: white; }
68
+ .badge-emerging { background: #FF9800; color: white; }
69
+ .badge-experimental { background: #9C27B0; color: white; }
70
+ .index-card {
71
+ border: 2px solid #ddd;
72
+ border-radius: 15px;
73
+ padding: 30px;
74
+ margin: 15px;
75
+ text-align: center;
76
+ cursor: pointer;
77
+ transition: all 0.3s;
78
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
79
+ color: white;
80
+ }
81
+ .index-card:hover {
82
+ transform: translateY(-5px);
83
+ box-shadow: 0 10px 20px rgba(0,0,0,0.2);
84
+ }
85
+ .stat-box {
86
+ background: #f8f9fa;
87
+ border-radius: 10px;
88
+ padding: 15px;
89
+ margin: 10px;
90
+ text-align: center;
91
+ }
92
+ """
93
+
94
+ def create_modality_card(modality_obj):
95
+ """Create an HTML card for a single modality"""
96
+
97
+ # Maturity badge
98
+ maturity = modality_obj['metadata']['maturityLevel']
99
+ badge_class = f"badge badge-{maturity}"
100
+
101
+ # Input/Output info
102
+ input_primary = modality_obj['input']['primary']
103
+ input_secondary = modality_obj['input'].get('secondary', [])
104
+ output_primary = modality_obj['output']['primary']
105
+
106
+ # Build input string
107
+ input_str = f"**Primary:** {input_primary}"
108
+ if input_secondary:
109
+ input_str += f"<br>**Secondary:** {', '.join(input_secondary)}"
110
+
111
+ # Audio info for output
112
+ audio_info = ""
113
+ if modality_obj['output'].get('audio'):
114
+ audio_type = modality_obj['output'].get('audioType', 'N/A')
115
+ audio_info = f"<br>**Audio:** {audio_type}"
116
+
117
+ # Characteristics
118
+ chars = modality_obj.get('characteristics', {})
119
+ char_items = [f"**{k}:** {v}" for k, v in chars.items()]
120
+ char_str = "<br>".join(char_items) if char_items else "N/A"
121
+
122
+ # Use cases
123
+ use_cases = modality_obj['metadata'].get('commonUseCases', [])
124
+ use_case_str = "<br>β€’ " + "<br>β€’ ".join(use_cases) if use_cases else "N/A"
125
+
126
+ # Platforms
127
+ platforms = modality_obj['metadata'].get('platforms', [])
128
+ platform_str = ", ".join(platforms) if platforms else "N/A"
129
+
130
+ # Example models
131
+ models = modality_obj['metadata'].get('exampleModels', [])
132
+ model_str = ", ".join(models) if models else "N/A"
133
+
134
+ html = f"""
135
+ <div class="modality-card">
136
+ <div class="modality-header">
137
+ {modality_obj['name']}
138
+ <span class="{badge_class}">{maturity}</span>
139
+ </div>
140
+
141
+ <div class="modality-meta">
142
+ <p><strong>πŸ”Ή Input</strong><br>{input_str}</p>
143
+ <p><strong>πŸ”Έ Output</strong><br>**Primary:** {output_primary}{audio_info}</p>
144
+ </div>
145
+
146
+ <details>
147
+ <summary><strong>πŸ“Š Characteristics</strong></summary>
148
+ <div style="margin: 10px; padding: 10px; background: #fafafa; border-radius: 5px;">
149
+ {char_str}
150
+ </div>
151
+ </details>
152
+
153
+ <details>
154
+ <summary><strong>πŸ’‘ Common Use Cases</strong></summary>
155
+ <div style="margin: 10px; padding: 10px; background: #fafafa; border-radius: 5px;">
156
+ {use_case_str}
157
+ </div>
158
+ </details>
159
+
160
+ <details>
161
+ <summary><strong>πŸ› οΈ Platforms & Models</strong></summary>
162
+ <div style="margin: 10px; padding: 10px; background: #fafafa; border-radius: 5px;">
163
+ <p><strong>Platforms:</strong> {platform_str}</p>
164
+ <p><strong>Example Models:</strong> {model_str}</p>
165
+ </div>
166
+ </details>
167
+ </div>
168
+ """
169
+ return html
170
+
171
+ def create_overview_page():
172
+ """Create the main overview/index page"""
173
+
174
+ stats_html = "<div style='display: flex; flex-wrap: wrap; justify-content: space-around;'>"
175
+
176
+ total_modalities = 0
177
+ for modality_key, operations in taxonomy_data.items():
178
+ info = MODALITY_INFO.get(modality_key, {"name": modality_key, "emoji": "πŸ“¦", "color": "#666"})
179
+
180
+ creation_count = len(operations.get('creation', {}).get('modalities', []))
181
+ editing_count = len(operations.get('editing', {}).get('modalities', []))
182
+ total_count = creation_count + editing_count
183
+ total_modalities += total_count
184
+
185
+ stats_html += f"""
186
+ <div class="stat-box" style="border-left: 4px solid {info['color']};">
187
+ <div style="font-size: 2em;">{info['emoji']}</div>
188
+ <div style="font-size: 1.2em; font-weight: bold; margin: 10px 0;">{info['name']}</div>
189
+ <div style="font-size: 0.9em; color: #666;">
190
+ Creation: {creation_count} | Editing: {editing_count}
191
+ </div>
192
+ <div style="font-size: 1.5em; font-weight: bold; color: {info['color']}; margin-top: 10px;">
193
+ {total_count} modalities
194
+ </div>
195
+ </div>
196
+ """
197
+
198
+ stats_html += "</div>"
199
+
200
+ overview_html = f"""
201
+ <div style="text-align: center; padding: 30px;">
202
+ <h1>🎯 Multimodal AI Taxonomy</h1>
203
+ <p style="font-size: 1.2em; color: #666; max-width: 800px; margin: 20px auto;">
204
+ A comprehensive taxonomy for multimodal generative AI capabilities, organized by output modality and operation type.
205
+ </p>
206
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 15px; margin: 20px auto; max-width: 300px;">
207
+ <div style="font-size: 3em; font-weight: bold;">{total_modalities}</div>
208
+ <div style="font-size: 1.2em;">Total Modalities</div>
209
+ </div>
210
+ </div>
211
+
212
+ {stats_html}
213
+
214
+ <div style="margin: 30px; padding: 20px; background: #f0f7ff; border-radius: 10px; border-left: 4px solid #2196F3;">
215
+ <h3>πŸ“– How to Use This Space</h3>
216
+ <p>Navigate through the tabs above to explore different output modalities (Video, Audio, Image, Text, 3D).</p>
217
+ <p>Each modality is organized into <strong>Creation</strong> (generating new content) and <strong>Editing</strong> (modifying existing content) operations.</p>
218
+ <p>Click on the details sections to expand and see characteristics, use cases, platforms, and example models.</p>
219
+ </div>
220
+ """
221
+
222
+ return overview_html
223
+
224
+ def create_modality_page(modality_key, operation_type):
225
+ """Create a page for a specific modality and operation type"""
226
+
227
+ if modality_key not in taxonomy_data:
228
+ return f"<p>No data found for {modality_key}</p>"
229
+
230
+ if operation_type not in taxonomy_data[modality_key]:
231
+ return f"<p>No {operation_type} data found for {modality_key}</p>"
232
+
233
+ data = taxonomy_data[modality_key][operation_type]
234
+ modalities = data.get('modalities', [])
235
+
236
+ info = MODALITY_INFO.get(modality_key, {"name": modality_key, "emoji": "πŸ“¦", "color": "#666"})
237
+
238
+ html = f"""
239
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, {info['color']}22 0%, {info['color']}44 100%); border-radius: 15px; margin-bottom: 20px;">
240
+ <h2>{info['emoji']} {info['name']} - {operation_type.title()}</h2>
241
+ <p style="color: #666;">{data.get('description', '')}</p>
242
+ <div style="font-size: 1.5em; font-weight: bold; color: {info['color']}; margin-top: 10px;">
243
+ {len(modalities)} modalities
244
+ </div>
245
+ </div>
246
+ """
247
+
248
+ for modality in modalities:
249
+ html += create_modality_card(modality)
250
+
251
+ return html
252
+
253
+ def create_comparison_table(modality_key):
254
+ """Create a comparison table for creation vs editing"""
255
+
256
+ if modality_key not in taxonomy_data:
257
+ return pd.DataFrame()
258
+
259
+ rows = []
260
+ for operation_type in ['creation', 'editing']:
261
+ if operation_type in taxonomy_data[modality_key]:
262
+ modalities = taxonomy_data[modality_key][operation_type].get('modalities', [])
263
+ for mod in modalities:
264
+ rows.append({
265
+ 'Operation': operation_type.title(),
266
+ 'Name': mod['name'],
267
+ 'Primary Input': mod['input']['primary'],
268
+ 'Primary Output': mod['output']['primary'],
269
+ 'Maturity': mod['metadata']['maturityLevel'],
270
+ 'Platforms': len(mod['metadata'].get('platforms', [])),
271
+ })
272
+
273
+ return pd.DataFrame(rows)
274
+
275
+ # Create the Gradio interface
276
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
277
+
278
+ gr.Markdown("# 🎯 Multimodal AI Taxonomy Explorer")
279
+
280
+ with gr.Tabs():
281
+ # Overview tab
282
+ with gr.Tab("🏠 Overview"):
283
+ gr.HTML(create_overview_page())
284
+
285
+ # Video Generation
286
+ with gr.Tab("🎬 Video"):
287
+ with gr.Tabs():
288
+ with gr.Tab("Creation"):
289
+ gr.HTML(create_modality_page("video_generation", "creation"))
290
+ with gr.Tab("Editing"):
291
+ gr.HTML(create_modality_page("video_generation", "editing"))
292
+ with gr.Tab("Comparison"):
293
+ gr.Dataframe(create_comparison_table("video_generation"), wrap=True)
294
+
295
+ # Audio Generation
296
+ with gr.Tab("🎡 Audio"):
297
+ with gr.Tabs():
298
+ with gr.Tab("Creation"):
299
+ gr.HTML(create_modality_page("audio_generation", "creation"))
300
+ with gr.Tab("Editing"):
301
+ gr.HTML(create_modality_page("audio_generation", "editing"))
302
+ with gr.Tab("Comparison"):
303
+ gr.Dataframe(create_comparison_table("audio_generation"), wrap=True)
304
+
305
+ # Image Generation
306
+ with gr.Tab("πŸ–ΌοΈ Image"):
307
+ with gr.Tabs():
308
+ with gr.Tab("Creation"):
309
+ gr.HTML(create_modality_page("image_generation", "creation"))
310
+ with gr.Tab("Editing"):
311
+ gr.HTML(create_modality_page("image_generation", "editing"))
312
+ with gr.Tab("Comparison"):
313
+ gr.Dataframe(create_comparison_table("image_generation"), wrap=True)
314
+
315
+ # Text Generation
316
+ with gr.Tab("πŸ“ Text"):
317
+ with gr.Tabs():
318
+ with gr.Tab("Creation"):
319
+ gr.HTML(create_modality_page("text_generation", "creation"))
320
+ with gr.Tab("Editing"):
321
+ gr.HTML(create_modality_page("text_generation", "editing"))
322
+ with gr.Tab("Comparison"):
323
+ gr.Dataframe(create_comparison_table("text_generation"), wrap=True)
324
+
325
+ # 3D Generation
326
+ with gr.Tab("🎨 3D"):
327
+ with gr.Tabs():
328
+ with gr.Tab("Creation"):
329
+ gr.HTML(create_modality_page("3d_generation", "creation"))
330
+ with gr.Tab("Editing"):
331
+ gr.HTML(create_modality_page("3d_generation", "editing"))
332
+ with gr.Tab("Comparison"):
333
+ gr.Dataframe(create_comparison_table("3d_generation"), wrap=True)
334
+
335
+ # About tab
336
+ with gr.Tab("ℹ️ About"):
337
+ gr.Markdown("""
338
+ ## About This Taxonomy
339
+
340
+ This taxonomy provides a structured classification of multimodal AI capabilities, organized by:
341
+
342
+ - **Output Modality**: The primary type of content being generated (video, audio, image, text, 3D)
343
+ - **Operation Type**: Whether the task involves creation (from scratch) or editing (modifying existing content)
344
+
345
+ ### Key Features
346
+
347
+ - **Comprehensive Coverage**: Covers all major multimodal AI capabilities
348
+ - **Structured Metadata**: Each modality includes input/output specs, characteristics, maturity level, use cases, platforms, and example models
349
+ - **Fine-grained Classification**: Goes beyond simple input/output categorization to capture nuanced differences
350
+
351
+ ### Data Schema
352
+
353
+ Each modality entry includes:
354
+ - Unique identifier and human-readable name
355
+ - Input specifications (primary and secondary modalities)
356
+ - Output specifications (with audio metadata for video outputs)
357
+ - Characteristics (process type, audio handling, motion type, etc.)
358
+ - Metadata (maturity level, use cases, platforms, example models)
359
+
360
+ ### Dataset
361
+
362
+ This visualization is powered by the [multimodal-ai-taxonomy](https://huggingface.co/datasets/danielrosehill/multimodal-ai-taxonomy) dataset on Hugging Face.
363
+
364
+ ### Maturity Levels
365
+
366
+ - **Mature**: Well-established, widely available, production-ready
367
+ - **Emerging**: Growing adoption, increasingly stable
368
+ - **Experimental**: Cutting-edge, limited availability, proof-of-concept
369
+ """)
370
+
371
+ if __name__ == "__main__":
372
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.49.1
2
+ datasets
3
+ pandas