Spaces:

yonigozlan
/

Transformers-Timeline

Running

App Files Files Community

yonigozlan HF Staff commited on Sep 25

Commit

b23258f

1 Parent(s): 6acd792

parse modalities instead of harcoding them

Browse files

Files changed (1) hide show

app.py +136 -449

app.py CHANGED Viewed

@@ -10,12 +10,17 @@ import sys
 import time
 import webbrowser
 from datetime import datetime
-from typing import Dict, List, Optional
 from flask import Flask, jsonify, render_template, request
 import transformers
 class TransformersTimelineParser:
     """Parser for extracting model release dates from Transformers documentation."""
@@ -29,451 +34,133 @@ class TransformersTimelineParser:
         transformers_src = os.path.join(os.path.dirname(docs_dir), "..", "..", "src")
         if transformers_src not in sys.path:
             sys.path.insert(0, transformers_src)
-        # Modality definitions with modern color scheme
-        self.modalities = {
-            "text": {
-                "name": "Text Models",
-                "color": "#F59E0B",  # Soft amber
-                "models": [
-                    "albert",
-                    "apertus",
-                    "arcee",
-                    "bamba",
-                    "bart",
-                    "barthez",
-                    "bartpho",
-                    "bert",
-                    "bert-generation",
-                    "bert-japanese",
-                    "bertweet",
-                    "big_bird",
-                    "bigbird_pegasus",
-                    "biogpt",
-                    "bitnet",
-                    "blenderbot",
-                    "blenderbot-small",
-                    "bloom",
-                    "bort",
-                    "byt5",
-                    "camembert",
-                    "canine",
-                    "codegen",
-                    "code_llama",
-                    "cohere",
-                    "cohere2",
-                    "convbert",
-                    "cpm",
-                    "cpmant",
-                    "ctrl",
-                    "dbrx",
-                    "deberta",
-                    "deberta-v2",
-                    "deepseek_v3",
-                    "dialogpt",
-                    "diffllama",
-                    "distilbert",
-                    "doge",
-                    "dots1",
-                    "dpr",
-                    "electra",
-                    "encoder-decoder",
-                    "ernie",
-                    "ernie4_5",
-                    "ernie4_5_moe",
-                    "ernie_m",
-                    "esm",
-                    "exaone4",
-                    "falcon",
-                    "falcon3",
-                    "falcon_h1",
-                    "falcon_mamba",
-                    "flan-t5",
-                    "flan-ul2",
-                    "flaubert",
-                    "fnet",
-                    "fsmt",
-                    "funnel",
-                    "fuyu",
-                    "gemma",
-                    "gemma2",
-                    "glm",
-                    "glm4",
-                    "glm4_moe",
-                    "openai-gpt",
-                    "gpt_neo",
-                    "gpt_neox",
-                    "gpt_neox_japanese",
-                    "gptj",
-                    "gpt2",
-                    "gpt_bigcode",
-                    "gpt_oss",
-                    "gptsan-japanese",
-                    "gpt-sw3",
-                    "granite",
-                    "granitemoe",
-                    "granitemoehybrid",
-                    "granitemoeshared",
-                    "helium",
-                    "herbert",
-                    "hgnet_v2",
-                    "hunyuan_v1_dense",
-                    "hunyuan_v1_moe",
-                    "ibert",
-                    "jamba",
-                    "jetmoe",
-                    "jukebox",
-                    "led",
-                    "lfm2",
-                    "llama",
-                    "llama2",
-                    "llama3",
-                    "longformer",
-                    "longt5",
-                    "luke",
-                    "m2m_100",
-                    "madlad-400",
-                    "mamba",
-                    "mamba2",
-                    "marian",
-                    "markuplm",
-                    "mbart",
-                    "mega",
-                    "megatron-bert",
-                    "megatron_gpt2",
-                    "minimax",
-                    "ministral",
-                    "mistral",
-                    "mixtral",
-                    "mluke",
-                    "mobilebert",
-                    "modernbert",
-                    "modernbert-decoder",
-                    "mpnet",
-                    "mpt",
-                    "mra",
-                    "mt5",
-                    "mvp",
-                    "myt5",
-                    "nemotron",
-                    "nezha",
-                    "nllb",
-                    "nllb-moe",
-                    "nystromformer",
-                    "olmo",
-                    "olmo2",
-                    "olmo3",
-                    "olmoe",
-                    "open-llama",
-                    "opt",
-                    "pegasus",
-                    "pegasus_x",
-                    "persimmon",
-                    "phi",
-                    "phi3",
-                    "phimoe",
-                    "phobert",
-                    "plbart",
-                    "prophetnet",
-                    "qdqbert",
-                    "qwen2",
-                    "qwen2_moe",
-                    "qwen3",
-                    "qwen3_moe",
-                    "qwen3_next",
-                    "rag",
-                    "realm",
-                    "recurrent_gemma",
-                    "reformer",
-                    "rembert",
-                    "retribert",
-                    "roberta",
-                    "roberta-prelayernorm",
-                    "roc_bert",
-                    "roformer",
-                    "rwkv",
-                    "seed_oss",
-                    "splinter",
-                    "squeezebert",
-                    "stablelm",
-                    "starcoder2",
-                    "switch_transformers",
-                    "t5",
-                    "t5gemma",
-                    "t5v1.1",
-                    "tapex",
-                    "transfo-xl",
-                    "ul2",
-                    "umt5",
-                    "vaultgemma",
-                    "xmod",
-                    "xglm",
-                    "xlm",
-                    "xlm-prophetnet",
-                    "xlm-roberta",
-                    "xlm-roberta-xl",
-                    "xlm-v",
-                    "xlnet",
-                    "xlstm",
-                    "yoso",
-                    "zamba",
-                    "zamba2",
-                ],
-            },
-            "vision": {
-                "name": "Vision Models",
-                "color": "#06B6D4",  # Soft cyan
-                "models": [
-                    "aimv2",
-                    "beit",
-                    "bit",
-                    "conditional_detr",
-                    "convnext",
-                    "convnextv2",
-                    "cvt",
-                    "d_fine",
-                    "dab-detr",
-                    "deepseek_v2",
-                    "deepseek_vl",
-                    "deepseek_vl_hybrid",
-                    "deformable_detr",
-                    "deit",
-                    "depth_anything",
-                    "depth_anything_v2",
-                    "depth_pro",
-                    "deta",
-                    "detr",
-                    "dinat",
-                    "dinov2",
-                    "dinov2_with_registers",
-                    "dinov3",
-                    "dit",
-                    "dpt",
-                    "efficientformer",
-                    "efficientloftr",
-                    "efficientnet",
-                    "eomt",
-                    "focalnet",
-                    "glpn",
-                    "hgnet_v2",
-                    "hiera",
-                    "ijepa",
-                    "imagegpt",
-                    "levit",
-                    "lightglue",
-                    "mask2former",
-                    "maskformer",
-                    "mlcd",
-                    "mobilenet_v1",
-                    "mobilenet_v2",
-                    "mobilevit",
-                    "mobilevitv2",
-                    "nat",
-                    "poolformer",
-                    "prompt_depth_anything",
-                    "pvt",
-                    "pvt_v2",
-                    "regnet",
-                    "resnet",
-                    "rt_detr",
-                    "rt_detr_v2",
-                    "segformer",
-                    "seggpt",
-                    "superglue",
-                    "superpoint",
-                    "swiftformer",
-                    "swin",
-                    "swinv2",
-                    "swin2sr",
-                    "table-transformer",
-                    "textnet",
-                    "timm_wrapper",
-                    "upernet",
-                    "van",
-                    "vit",
-                    "vit_hybrid",
-                    "vitdet",
-                    "vit_mae",
-                    "vitmatte",
-                    "vit_msn",
-                    "vitpose",
-                    "yolos",
-                    "zoedepth",
-                ],
-            },
-            "audio": {
-                "name": "Audio Models",
-                "color": "#8B5CF6",  # Soft purple
-                "models": [
-                    "audio-spectrogram-transformer",
-                    "bark",
-                    "clap",
-                    "csm",
-                    "dac",
-                    "dia",
-                    "encodec",
-                    "fastspeech2_conformer",
-                    "granite_speech",
-                    "hubert",
-                    "kyutai_speech_to_text",
-                    "mctct",
-                    "mimi",
-                    "mms",
-                    "moonshine",
-                    "moshi",
-                    "musicgen",
-                    "musicgen_melody",
-                    "pop2piano",
-                    "seamless_m4t",
-                    "seamless_m4t_v2",
-                    "sew",
-                    "sew-d",
-                    "speech_to_text",
-                    "speech_to_text_2",
-                    "speecht5",
-                    "unispeech",
-                    "unispeech-sat",
-                    "univnet",
-                    "vits",
-                    "wav2vec2",
-                    "wav2vec2-bert",
-                    "wav2vec2-conformer",
-                    "wav2vec2_phoneme",
-                    "wavlm",
-                    "whisper",
-                    "xcodec",
-                    "xls_r",
-                    "xlsr_wav2vec2",
-                ],
-            },
-            "video": {
-                "name": "Video Models",
-                "color": "#EC4899",  # Soft pink
-                "models": ["timesformer", "vjepa2", "videomae", "vivit"],
-            },
-            "multimodal": {
-                "name": "Multimodal Models",
-                "color": "#10B981",  # Soft emerald
-                "models": [
-                    "align",
-                    "altclip",
-                    "aria",
-                    "aya_vision",
-                    "blip",
-                    "blip-2",
-                    "bridgetower",
-                    "bros",
-                    "chameleon",
-                    "chinese_clip",
-                    "clip",
-                    "clipseg",
-                    "clvp",
-                    "cohere2_vision",
-                    "colpali",
-                    "colqwen2",
-                    "data2vec",
-                    "deplot",
-                    "donut",
-                    "emu3",
-                    "evolla",
-                    "flava",
-                    "florence2",
-                    "gemma3",
-                    "gemma3n",
-                    "git",
-                    "glm4v",
-                    "glm4v_moe",
-                    "got_ocr2",
-                    "granitevision",
-                    "grounding-dino",
-                    "groupvit",
-                    "idefics",
-                    "idefics2",
-                    "idefics3",
-                    "instructblip",
-                    "instructblipvideo",
-                    "internvl",
-                    "janus",
-                    "kosmos-2",
-                    "kosmos2_5",
-                    "layoutlm",
-                    "layoutlmv2",
-                    "layoutlmv3",
-                    "layoutxlm",
-                    "lilt",
-                    "llama4",
-                    "llava",
-                    "llava_next",
-                    "llava_next_video",
-                    "llava_onevision",
-                    "lxmert",
-                    "matcha",
-                    "metaclip_2",
-                    "mgp-str",
-                    "mistral3",
-                    "mllama",
-                    "mm-grounding-dino",
-                    "nougat",
-                    "omdet-turbo",
-                    "oneformer",
-                    "ovis2",
-                    "owlvit",
-                    "owlv2",
-                    "paligemma",
-                    "perceiver",
-                    "perception_lm",
-                    "phi4_multimodal",
-                    "pix2struct",
-                    "pixtral",
-                    "qwen2_5_omni",
-                    "qwen2_5_vl",
-                    "qwen2_audio",
-                    "qwen2_vl",
-                    "qwen3_vl",
-                    "qwen3_vl_moe",
-                    "sam2",
-                    "sam2_video",
-                    "sam",
-                    "sam_hq",
-                    "shieldgemma2",
-                    "siglip",
-                    "siglip2",
-                    "smollm3",
-                    "smolvlm",
-                    "speech-encoder-decoder",
-                    "tapas",
-                    "trocr",
-                    "tvlt",
-                    "tvp",
-                    "udop",
-                    "video_llava",
-                    "vilt",
-                    "vipllava",
-                    "vision-encoder-decoder",
-                    "vision-text-dual-encoder",
-                    "visual_bert",
-                    "voxtral",
-                    "xclip",
-                ],
-            },
-            "reinforcement": {
-                "name": "Reinforcement Learning",
-                "color": "#EF4444",  # Soft red
-                "models": ["decision_transformer", "trajectory_transformer"],
-            },
-            "timeseries": {
-                "name": "Time Series Models",
-                "color": "#F97316",  # Soft orange
-                "models": ["autoformer", "informer", "patchtsmixer", "patchtst", "time_series_transformer", "timesfm"],
-            },
-            "graph": {
-                "name": "Graph Models",
-                "color": "#6B7280",  # Soft gray
-                "models": ["graphormer"],
-            },
-        }
-    def get_model_modality(self, model_name: str) -> Dict[str, str]:
         """Determine the modality category for a given model."""
         for modality_key, modality_info in self.modalities.items():
             if model_name in modality_info["models"]:
@@ -481,7 +168,7 @@ class TransformersTimelineParser:
         # Default to text if not found (most common)
         return {"key": "text", "name": "Text Models", "color": "#F59E0B"}
-    def parse_release_date_from_file(self, file_path: str) -> Optional[Dict[str, str]]:
         """Parse the release date line from a model documentation file."""
         try:
             with open(file_path, "r", encoding="utf-8") as f:
@@ -597,7 +284,7 @@ class TransformersTimelineParser:
             print(f"Error extracting description: {e}")
             return "No description available."
-    def load_model_task_mappings(self) -> Dict[str, List[str]]:
         """Load model-to-task mappings from transformers auto model mappings."""
         if self.tasks_cache:
             return self.tasks_cache
@@ -683,7 +370,7 @@ class TransformersTimelineParser:
             print(f"❌ Error loading task mappings: {e}")
             return {}
-    def get_model_tasks(self, model_name: str) -> List[str]:
         """Get the list of tasks/pipelines supported by a model."""
         if not self.tasks_cache:
             self.load_model_task_mappings()
@@ -726,7 +413,7 @@ class TransformersTimelineParser:
         return os.path.basename(file_path).replace(".md", "").replace("_", " ").replace("-", " ").title()
-    def parse_all_model_dates(self, force_refresh: bool = False) -> List[Dict[str, str]]:
         """Parse release dates from all model documentation files."""
         if self.models_cache is not None and not force_refresh:
             return self.models_cache

 import time
 import webbrowser
 from datetime import datetime
+from typing import Optional
 from flask import Flask, jsonify, render_template, request
 import transformers
+try:
+    import yaml  # type: ignore
+except Exception:  # pragma: no cover
+    yaml = None
 class TransformersTimelineParser:
     """Parser for extracting model release dates from Transformers documentation."""
         transformers_src = os.path.join(os.path.dirname(docs_dir), "..", "..", "src")
         if transformers_src not in sys.path:
             sys.path.insert(0, transformers_src)
+        # Parse modalities dynamically; no fallback to static definitions
+        parsed_modalities = self._parse_modalities_from_toctree()
+        if not parsed_modalities:
+            raise RuntimeError("Failed to parse modalities from docs toctree (_toctree.yml)")
+        self.modalities = parsed_modalities
+    def _parse_modalities_from_toctree(self) -> Optional[dict[str, dict[str, object]]]:
+        """Parse model modalities and slugs from docs/source/en/_toctree.yml.
+        Returns a dict with the same schema as self.modalities or None on failure.
+        """
+        # Compute toctree path relative to provided docs_dir
+        toctree_path = os.path.join(self.docs_dir, "..", "_toctree.yml")
+        if not os.path.isfile(toctree_path):
+            return None
+        if yaml is None:
+            return None
+        with open(toctree_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        if not isinstance(data, list):
+            return None
+        # Locate API -> Models
+        api_top = None
+        for entry in data:
+            if isinstance(entry, dict) and entry.get("title") == "API" and entry.get("sections"):
+                api_top = entry
+                break
+        if api_top is None:
+            def _dfs_find_api(node):
+                if isinstance(node, dict) and node.get("title") == "API" and node.get("sections"):
+                    return node
+                if isinstance(node, dict):
+                    for v in node.values():
+                        found = _dfs_find_api(v)
+                        if found is not None:
+                            return found
+                if isinstance(node, list):
+                    for v in node:
+                        found = _dfs_find_api(v)
+                        if found is not None:
+                            return found
+                return None
+            api_top = _dfs_find_api(data)
+            if api_top is None:
+                return None
+        models_top = None
+        for sec in api_top.get("sections", []):
+            if isinstance(sec, dict) and sec.get("title") == "Models" and sec.get("sections"):
+                models_top = sec
+                break
+        if models_top is None:
+            def _dfs_find_models(node):
+                if isinstance(node, dict) and node.get("title") == "Models" and node.get("sections"):
+                    return node
+                if isinstance(node, dict):
+                    for v in node.values():
+                        found = _dfs_find_models(v)
+                        if found is not None:
+                            return found
+                if isinstance(node, list):
+                    for v in node:
+                        found = _dfs_find_models(v)
+                        if found is not None:
+                            return found
+                return None
+            models_top = _dfs_find_models(api_top)
+            if models_top is None:
+                return None
+        # Helper to extract slugs from a section like "Text models"
+        def extract_model_slugs(section_title: str) -> list[str]:
+            result: list[str] = []
+            for sec in models_top.get("sections", []):
+                if isinstance(sec, dict) and sec.get("title") == section_title:
+                    # Items may be nested under sections -> sections -> list of {local: model_doc/<slug>, title: ...}
+                    nested = sec.get("sections") or []
+                    for sub in nested:
+                        if not isinstance(sub, dict):
+                            continue
+                        # Direct list:
+                        if "local" in sub:
+                            local = sub.get("local")
+                            if isinstance(local, str) and local.startswith("model_doc/"):
+                                result.append(local.split("/", 1)[1])
+                        # Or deeper nesting
+                        for leaf in sub.get("sections", []) if isinstance(sub.get("sections"), list) else []:
+                            local = leaf.get("local")
+                            if isinstance(local, str) and local.startswith("model_doc/"):
+                                result.append(local.split("/", 1)[1])
+            return result
+        text_models = extract_model_slugs("Text models")
+        vision_models = extract_model_slugs("Vision models")
+        audio_models = extract_model_slugs("Audio models")
+        video_models = extract_model_slugs("Video models")
+        multimodal_models = extract_model_slugs("Multimodal models")
+        rl_models = extract_model_slugs("Reinforcement learning models")
+        ts_models = extract_model_slugs("Time series models")
+        graph_models = extract_model_slugs("Graph models")
+        # Basic validation: require at least some categories to be non-empty
+        if not any([text_models, vision_models, audio_models, video_models, multimodal_models]):
+            return None
+        # Preserve existing names and colors
+        return {
+            "text": {"name": "Text Models", "color": "#F59E0B", "models": text_models},
+            "vision": {"name": "Vision Models", "color": "#06B6D4", "models": vision_models},
+            "audio": {"name": "Audio Models", "color": "#8B5CF6", "models": audio_models},
+            "video": {"name": "Video Models", "color": "#EC4899", "models": video_models},
+            "multimodal": {"name": "Multimodal Models", "color": "#10B981", "models": multimodal_models},
+            "reinforcement": {"name": "Reinforcement Learning", "color": "#EF4444", "models": rl_models},
+            "timeseries": {"name": "Time Series Models", "color": "#F97316", "models": ts_models},
+            "graph": {"name": "Graph Models", "color": "#6B7280", "models": graph_models},
+        }
+    def get_model_modality(self, model_name: str) -> dict[str, str]:
         """Determine the modality category for a given model."""
         for modality_key, modality_info in self.modalities.items():
             if model_name in modality_info["models"]:
         # Default to text if not found (most common)
         return {"key": "text", "name": "Text Models", "color": "#F59E0B"}
+    def parse_release_date_from_file(self, file_path: str) -> Optional[dict[str, str]]:
         """Parse the release date line from a model documentation file."""
         try:
             with open(file_path, "r", encoding="utf-8") as f:
             print(f"Error extracting description: {e}")
             return "No description available."
+    def load_model_task_mappings(self) -> dict[str, list[str]]:
         """Load model-to-task mappings from transformers auto model mappings."""
         if self.tasks_cache:
             return self.tasks_cache
             print(f"❌ Error loading task mappings: {e}")
             return {}
+    def get_model_tasks(self, model_name: str) -> list[str]:
         """Get the list of tasks/pipelines supported by a model."""
         if not self.tasks_cache:
             self.load_model_task_mappings()
         return os.path.basename(file_path).replace(".md", "").replace("_", " ").replace("-", " ").title()
+    def parse_all_model_dates(self, force_refresh: bool = False) -> list[dict[str, str]]:
         """Parse release dates from all model documentation files."""
         if self.models_cache is not None and not force_refresh:
             return self.models_cache