yonigozlan HF Staff commited on
Commit
b23258f
·
1 Parent(s): 6acd792

parse modalities instead of harcoding them

Browse files
Files changed (1) hide show
  1. app.py +136 -449
app.py CHANGED
@@ -10,12 +10,17 @@ import sys
10
  import time
11
  import webbrowser
12
  from datetime import datetime
13
- from typing import Dict, List, Optional
14
 
15
  from flask import Flask, jsonify, render_template, request
16
 
17
  import transformers
18
 
 
 
 
 
 
19
 
20
  class TransformersTimelineParser:
21
  """Parser for extracting model release dates from Transformers documentation."""
@@ -29,451 +34,133 @@ class TransformersTimelineParser:
29
  transformers_src = os.path.join(os.path.dirname(docs_dir), "..", "..", "src")
30
  if transformers_src not in sys.path:
31
  sys.path.insert(0, transformers_src)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # Modality definitions with modern color scheme
34
- self.modalities = {
35
- "text": {
36
- "name": "Text Models",
37
- "color": "#F59E0B", # Soft amber
38
- "models": [
39
- "albert",
40
- "apertus",
41
- "arcee",
42
- "bamba",
43
- "bart",
44
- "barthez",
45
- "bartpho",
46
- "bert",
47
- "bert-generation",
48
- "bert-japanese",
49
- "bertweet",
50
- "big_bird",
51
- "bigbird_pegasus",
52
- "biogpt",
53
- "bitnet",
54
- "blenderbot",
55
- "blenderbot-small",
56
- "bloom",
57
- "bort",
58
- "byt5",
59
- "camembert",
60
- "canine",
61
- "codegen",
62
- "code_llama",
63
- "cohere",
64
- "cohere2",
65
- "convbert",
66
- "cpm",
67
- "cpmant",
68
- "ctrl",
69
- "dbrx",
70
- "deberta",
71
- "deberta-v2",
72
- "deepseek_v3",
73
- "dialogpt",
74
- "diffllama",
75
- "distilbert",
76
- "doge",
77
- "dots1",
78
- "dpr",
79
- "electra",
80
- "encoder-decoder",
81
- "ernie",
82
- "ernie4_5",
83
- "ernie4_5_moe",
84
- "ernie_m",
85
- "esm",
86
- "exaone4",
87
- "falcon",
88
- "falcon3",
89
- "falcon_h1",
90
- "falcon_mamba",
91
- "flan-t5",
92
- "flan-ul2",
93
- "flaubert",
94
- "fnet",
95
- "fsmt",
96
- "funnel",
97
- "fuyu",
98
- "gemma",
99
- "gemma2",
100
- "glm",
101
- "glm4",
102
- "glm4_moe",
103
- "openai-gpt",
104
- "gpt_neo",
105
- "gpt_neox",
106
- "gpt_neox_japanese",
107
- "gptj",
108
- "gpt2",
109
- "gpt_bigcode",
110
- "gpt_oss",
111
- "gptsan-japanese",
112
- "gpt-sw3",
113
- "granite",
114
- "granitemoe",
115
- "granitemoehybrid",
116
- "granitemoeshared",
117
- "helium",
118
- "herbert",
119
- "hgnet_v2",
120
- "hunyuan_v1_dense",
121
- "hunyuan_v1_moe",
122
- "ibert",
123
- "jamba",
124
- "jetmoe",
125
- "jukebox",
126
- "led",
127
- "lfm2",
128
- "llama",
129
- "llama2",
130
- "llama3",
131
- "longformer",
132
- "longt5",
133
- "luke",
134
- "m2m_100",
135
- "madlad-400",
136
- "mamba",
137
- "mamba2",
138
- "marian",
139
- "markuplm",
140
- "mbart",
141
- "mega",
142
- "megatron-bert",
143
- "megatron_gpt2",
144
- "minimax",
145
- "ministral",
146
- "mistral",
147
- "mixtral",
148
- "mluke",
149
- "mobilebert",
150
- "modernbert",
151
- "modernbert-decoder",
152
- "mpnet",
153
- "mpt",
154
- "mra",
155
- "mt5",
156
- "mvp",
157
- "myt5",
158
- "nemotron",
159
- "nezha",
160
- "nllb",
161
- "nllb-moe",
162
- "nystromformer",
163
- "olmo",
164
- "olmo2",
165
- "olmo3",
166
- "olmoe",
167
- "open-llama",
168
- "opt",
169
- "pegasus",
170
- "pegasus_x",
171
- "persimmon",
172
- "phi",
173
- "phi3",
174
- "phimoe",
175
- "phobert",
176
- "plbart",
177
- "prophetnet",
178
- "qdqbert",
179
- "qwen2",
180
- "qwen2_moe",
181
- "qwen3",
182
- "qwen3_moe",
183
- "qwen3_next",
184
- "rag",
185
- "realm",
186
- "recurrent_gemma",
187
- "reformer",
188
- "rembert",
189
- "retribert",
190
- "roberta",
191
- "roberta-prelayernorm",
192
- "roc_bert",
193
- "roformer",
194
- "rwkv",
195
- "seed_oss",
196
- "splinter",
197
- "squeezebert",
198
- "stablelm",
199
- "starcoder2",
200
- "switch_transformers",
201
- "t5",
202
- "t5gemma",
203
- "t5v1.1",
204
- "tapex",
205
- "transfo-xl",
206
- "ul2",
207
- "umt5",
208
- "vaultgemma",
209
- "xmod",
210
- "xglm",
211
- "xlm",
212
- "xlm-prophetnet",
213
- "xlm-roberta",
214
- "xlm-roberta-xl",
215
- "xlm-v",
216
- "xlnet",
217
- "xlstm",
218
- "yoso",
219
- "zamba",
220
- "zamba2",
221
- ],
222
- },
223
- "vision": {
224
- "name": "Vision Models",
225
- "color": "#06B6D4", # Soft cyan
226
- "models": [
227
- "aimv2",
228
- "beit",
229
- "bit",
230
- "conditional_detr",
231
- "convnext",
232
- "convnextv2",
233
- "cvt",
234
- "d_fine",
235
- "dab-detr",
236
- "deepseek_v2",
237
- "deepseek_vl",
238
- "deepseek_vl_hybrid",
239
- "deformable_detr",
240
- "deit",
241
- "depth_anything",
242
- "depth_anything_v2",
243
- "depth_pro",
244
- "deta",
245
- "detr",
246
- "dinat",
247
- "dinov2",
248
- "dinov2_with_registers",
249
- "dinov3",
250
- "dit",
251
- "dpt",
252
- "efficientformer",
253
- "efficientloftr",
254
- "efficientnet",
255
- "eomt",
256
- "focalnet",
257
- "glpn",
258
- "hgnet_v2",
259
- "hiera",
260
- "ijepa",
261
- "imagegpt",
262
- "levit",
263
- "lightglue",
264
- "mask2former",
265
- "maskformer",
266
- "mlcd",
267
- "mobilenet_v1",
268
- "mobilenet_v2",
269
- "mobilevit",
270
- "mobilevitv2",
271
- "nat",
272
- "poolformer",
273
- "prompt_depth_anything",
274
- "pvt",
275
- "pvt_v2",
276
- "regnet",
277
- "resnet",
278
- "rt_detr",
279
- "rt_detr_v2",
280
- "segformer",
281
- "seggpt",
282
- "superglue",
283
- "superpoint",
284
- "swiftformer",
285
- "swin",
286
- "swinv2",
287
- "swin2sr",
288
- "table-transformer",
289
- "textnet",
290
- "timm_wrapper",
291
- "upernet",
292
- "van",
293
- "vit",
294
- "vit_hybrid",
295
- "vitdet",
296
- "vit_mae",
297
- "vitmatte",
298
- "vit_msn",
299
- "vitpose",
300
- "yolos",
301
- "zoedepth",
302
- ],
303
- },
304
- "audio": {
305
- "name": "Audio Models",
306
- "color": "#8B5CF6", # Soft purple
307
- "models": [
308
- "audio-spectrogram-transformer",
309
- "bark",
310
- "clap",
311
- "csm",
312
- "dac",
313
- "dia",
314
- "encodec",
315
- "fastspeech2_conformer",
316
- "granite_speech",
317
- "hubert",
318
- "kyutai_speech_to_text",
319
- "mctct",
320
- "mimi",
321
- "mms",
322
- "moonshine",
323
- "moshi",
324
- "musicgen",
325
- "musicgen_melody",
326
- "pop2piano",
327
- "seamless_m4t",
328
- "seamless_m4t_v2",
329
- "sew",
330
- "sew-d",
331
- "speech_to_text",
332
- "speech_to_text_2",
333
- "speecht5",
334
- "unispeech",
335
- "unispeech-sat",
336
- "univnet",
337
- "vits",
338
- "wav2vec2",
339
- "wav2vec2-bert",
340
- "wav2vec2-conformer",
341
- "wav2vec2_phoneme",
342
- "wavlm",
343
- "whisper",
344
- "xcodec",
345
- "xls_r",
346
- "xlsr_wav2vec2",
347
- ],
348
- },
349
- "video": {
350
- "name": "Video Models",
351
- "color": "#EC4899", # Soft pink
352
- "models": ["timesformer", "vjepa2", "videomae", "vivit"],
353
- },
354
- "multimodal": {
355
- "name": "Multimodal Models",
356
- "color": "#10B981", # Soft emerald
357
- "models": [
358
- "align",
359
- "altclip",
360
- "aria",
361
- "aya_vision",
362
- "blip",
363
- "blip-2",
364
- "bridgetower",
365
- "bros",
366
- "chameleon",
367
- "chinese_clip",
368
- "clip",
369
- "clipseg",
370
- "clvp",
371
- "cohere2_vision",
372
- "colpali",
373
- "colqwen2",
374
- "data2vec",
375
- "deplot",
376
- "donut",
377
- "emu3",
378
- "evolla",
379
- "flava",
380
- "florence2",
381
- "gemma3",
382
- "gemma3n",
383
- "git",
384
- "glm4v",
385
- "glm4v_moe",
386
- "got_ocr2",
387
- "granitevision",
388
- "grounding-dino",
389
- "groupvit",
390
- "idefics",
391
- "idefics2",
392
- "idefics3",
393
- "instructblip",
394
- "instructblipvideo",
395
- "internvl",
396
- "janus",
397
- "kosmos-2",
398
- "kosmos2_5",
399
- "layoutlm",
400
- "layoutlmv2",
401
- "layoutlmv3",
402
- "layoutxlm",
403
- "lilt",
404
- "llama4",
405
- "llava",
406
- "llava_next",
407
- "llava_next_video",
408
- "llava_onevision",
409
- "lxmert",
410
- "matcha",
411
- "metaclip_2",
412
- "mgp-str",
413
- "mistral3",
414
- "mllama",
415
- "mm-grounding-dino",
416
- "nougat",
417
- "omdet-turbo",
418
- "oneformer",
419
- "ovis2",
420
- "owlvit",
421
- "owlv2",
422
- "paligemma",
423
- "perceiver",
424
- "perception_lm",
425
- "phi4_multimodal",
426
- "pix2struct",
427
- "pixtral",
428
- "qwen2_5_omni",
429
- "qwen2_5_vl",
430
- "qwen2_audio",
431
- "qwen2_vl",
432
- "qwen3_vl",
433
- "qwen3_vl_moe",
434
- "sam2",
435
- "sam2_video",
436
- "sam",
437
- "sam_hq",
438
- "shieldgemma2",
439
- "siglip",
440
- "siglip2",
441
- "smollm3",
442
- "smolvlm",
443
- "speech-encoder-decoder",
444
- "tapas",
445
- "trocr",
446
- "tvlt",
447
- "tvp",
448
- "udop",
449
- "video_llava",
450
- "vilt",
451
- "vipllava",
452
- "vision-encoder-decoder",
453
- "vision-text-dual-encoder",
454
- "visual_bert",
455
- "voxtral",
456
- "xclip",
457
- ],
458
- },
459
- "reinforcement": {
460
- "name": "Reinforcement Learning",
461
- "color": "#EF4444", # Soft red
462
- "models": ["decision_transformer", "trajectory_transformer"],
463
- },
464
- "timeseries": {
465
- "name": "Time Series Models",
466
- "color": "#F97316", # Soft orange
467
- "models": ["autoformer", "informer", "patchtsmixer", "patchtst", "time_series_transformer", "timesfm"],
468
- },
469
- "graph": {
470
- "name": "Graph Models",
471
- "color": "#6B7280", # Soft gray
472
- "models": ["graphormer"],
473
- },
474
- }
475
-
476
- def get_model_modality(self, model_name: str) -> Dict[str, str]:
477
  """Determine the modality category for a given model."""
478
  for modality_key, modality_info in self.modalities.items():
479
  if model_name in modality_info["models"]:
@@ -481,7 +168,7 @@ class TransformersTimelineParser:
481
  # Default to text if not found (most common)
482
  return {"key": "text", "name": "Text Models", "color": "#F59E0B"}
483
 
484
- def parse_release_date_from_file(self, file_path: str) -> Optional[Dict[str, str]]:
485
  """Parse the release date line from a model documentation file."""
486
  try:
487
  with open(file_path, "r", encoding="utf-8") as f:
@@ -597,7 +284,7 @@ class TransformersTimelineParser:
597
  print(f"Error extracting description: {e}")
598
  return "No description available."
599
 
600
- def load_model_task_mappings(self) -> Dict[str, List[str]]:
601
  """Load model-to-task mappings from transformers auto model mappings."""
602
  if self.tasks_cache:
603
  return self.tasks_cache
@@ -683,7 +370,7 @@ class TransformersTimelineParser:
683
  print(f"❌ Error loading task mappings: {e}")
684
  return {}
685
 
686
- def get_model_tasks(self, model_name: str) -> List[str]:
687
  """Get the list of tasks/pipelines supported by a model."""
688
  if not self.tasks_cache:
689
  self.load_model_task_mappings()
@@ -726,7 +413,7 @@ class TransformersTimelineParser:
726
 
727
  return os.path.basename(file_path).replace(".md", "").replace("_", " ").replace("-", " ").title()
728
 
729
- def parse_all_model_dates(self, force_refresh: bool = False) -> List[Dict[str, str]]:
730
  """Parse release dates from all model documentation files."""
731
  if self.models_cache is not None and not force_refresh:
732
  return self.models_cache
 
10
  import time
11
  import webbrowser
12
  from datetime import datetime
13
+ from typing import Optional
14
 
15
  from flask import Flask, jsonify, render_template, request
16
 
17
  import transformers
18
 
19
+ try:
20
+ import yaml # type: ignore
21
+ except Exception: # pragma: no cover
22
+ yaml = None
23
+
24
 
25
  class TransformersTimelineParser:
26
  """Parser for extracting model release dates from Transformers documentation."""
 
34
  transformers_src = os.path.join(os.path.dirname(docs_dir), "..", "..", "src")
35
  if transformers_src not in sys.path:
36
  sys.path.insert(0, transformers_src)
37
+ # Parse modalities dynamically; no fallback to static definitions
38
+ parsed_modalities = self._parse_modalities_from_toctree()
39
+ if not parsed_modalities:
40
+ raise RuntimeError("Failed to parse modalities from docs toctree (_toctree.yml)")
41
+ self.modalities = parsed_modalities
42
+
43
+ def _parse_modalities_from_toctree(self) -> Optional[dict[str, dict[str, object]]]:
44
+ """Parse model modalities and slugs from docs/source/en/_toctree.yml.
45
+
46
+ Returns a dict with the same schema as self.modalities or None on failure.
47
+ """
48
+
49
+ # Compute toctree path relative to provided docs_dir
50
+ toctree_path = os.path.join(self.docs_dir, "..", "_toctree.yml")
51
+ if not os.path.isfile(toctree_path):
52
+ return None
53
+
54
+ if yaml is None:
55
+ return None
56
+
57
+ with open(toctree_path, "r", encoding="utf-8") as f:
58
+ data = yaml.safe_load(f)
59
+
60
+ if not isinstance(data, list):
61
+ return None
62
+
63
+ # Locate API -> Models
64
+ api_top = None
65
+ for entry in data:
66
+ if isinstance(entry, dict) and entry.get("title") == "API" and entry.get("sections"):
67
+ api_top = entry
68
+ break
69
+ if api_top is None:
70
+
71
+ def _dfs_find_api(node):
72
+ if isinstance(node, dict) and node.get("title") == "API" and node.get("sections"):
73
+ return node
74
+ if isinstance(node, dict):
75
+ for v in node.values():
76
+ found = _dfs_find_api(v)
77
+ if found is not None:
78
+ return found
79
+ if isinstance(node, list):
80
+ for v in node:
81
+ found = _dfs_find_api(v)
82
+ if found is not None:
83
+ return found
84
+ return None
85
+
86
+ api_top = _dfs_find_api(data)
87
+ if api_top is None:
88
+ return None
89
+
90
+ models_top = None
91
+ for sec in api_top.get("sections", []):
92
+ if isinstance(sec, dict) and sec.get("title") == "Models" and sec.get("sections"):
93
+ models_top = sec
94
+ break
95
+ if models_top is None:
96
+
97
+ def _dfs_find_models(node):
98
+ if isinstance(node, dict) and node.get("title") == "Models" and node.get("sections"):
99
+ return node
100
+ if isinstance(node, dict):
101
+ for v in node.values():
102
+ found = _dfs_find_models(v)
103
+ if found is not None:
104
+ return found
105
+ if isinstance(node, list):
106
+ for v in node:
107
+ found = _dfs_find_models(v)
108
+ if found is not None:
109
+ return found
110
+ return None
111
+
112
+ models_top = _dfs_find_models(api_top)
113
+ if models_top is None:
114
+ return None
115
 
116
+ # Helper to extract slugs from a section like "Text models"
117
+ def extract_model_slugs(section_title: str) -> list[str]:
118
+ result: list[str] = []
119
+ for sec in models_top.get("sections", []):
120
+ if isinstance(sec, dict) and sec.get("title") == section_title:
121
+ # Items may be nested under sections -> sections -> list of {local: model_doc/<slug>, title: ...}
122
+ nested = sec.get("sections") or []
123
+ for sub in nested:
124
+ if not isinstance(sub, dict):
125
+ continue
126
+ # Direct list:
127
+ if "local" in sub:
128
+ local = sub.get("local")
129
+ if isinstance(local, str) and local.startswith("model_doc/"):
130
+ result.append(local.split("/", 1)[1])
131
+ # Or deeper nesting
132
+ for leaf in sub.get("sections", []) if isinstance(sub.get("sections"), list) else []:
133
+ local = leaf.get("local")
134
+ if isinstance(local, str) and local.startswith("model_doc/"):
135
+ result.append(local.split("/", 1)[1])
136
+ return result
137
+
138
+ text_models = extract_model_slugs("Text models")
139
+ vision_models = extract_model_slugs("Vision models")
140
+ audio_models = extract_model_slugs("Audio models")
141
+ video_models = extract_model_slugs("Video models")
142
+ multimodal_models = extract_model_slugs("Multimodal models")
143
+ rl_models = extract_model_slugs("Reinforcement learning models")
144
+ ts_models = extract_model_slugs("Time series models")
145
+ graph_models = extract_model_slugs("Graph models")
146
+
147
+ # Basic validation: require at least some categories to be non-empty
148
+ if not any([text_models, vision_models, audio_models, video_models, multimodal_models]):
149
+ return None
150
+
151
+ # Preserve existing names and colors
152
+ return {
153
+ "text": {"name": "Text Models", "color": "#F59E0B", "models": text_models},
154
+ "vision": {"name": "Vision Models", "color": "#06B6D4", "models": vision_models},
155
+ "audio": {"name": "Audio Models", "color": "#8B5CF6", "models": audio_models},
156
+ "video": {"name": "Video Models", "color": "#EC4899", "models": video_models},
157
+ "multimodal": {"name": "Multimodal Models", "color": "#10B981", "models": multimodal_models},
158
+ "reinforcement": {"name": "Reinforcement Learning", "color": "#EF4444", "models": rl_models},
159
+ "timeseries": {"name": "Time Series Models", "color": "#F97316", "models": ts_models},
160
+ "graph": {"name": "Graph Models", "color": "#6B7280", "models": graph_models},
161
+ }
162
+
163
+ def get_model_modality(self, model_name: str) -> dict[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  """Determine the modality category for a given model."""
165
  for modality_key, modality_info in self.modalities.items():
166
  if model_name in modality_info["models"]:
 
168
  # Default to text if not found (most common)
169
  return {"key": "text", "name": "Text Models", "color": "#F59E0B"}
170
 
171
+ def parse_release_date_from_file(self, file_path: str) -> Optional[dict[str, str]]:
172
  """Parse the release date line from a model documentation file."""
173
  try:
174
  with open(file_path, "r", encoding="utf-8") as f:
 
284
  print(f"Error extracting description: {e}")
285
  return "No description available."
286
 
287
+ def load_model_task_mappings(self) -> dict[str, list[str]]:
288
  """Load model-to-task mappings from transformers auto model mappings."""
289
  if self.tasks_cache:
290
  return self.tasks_cache
 
370
  print(f"❌ Error loading task mappings: {e}")
371
  return {}
372
 
373
+ def get_model_tasks(self, model_name: str) -> list[str]:
374
  """Get the list of tasks/pipelines supported by a model."""
375
  if not self.tasks_cache:
376
  self.load_model_task_mappings()
 
413
 
414
  return os.path.basename(file_path).replace(".md", "").replace("_", " ").replace("-", " ").title()
415
 
416
+ def parse_all_model_dates(self, force_refresh: bool = False) -> list[dict[str, str]]:
417
  """Parse release dates from all model documentation files."""
418
  if self.models_cache is not None and not force_refresh:
419
  return self.models_cache