| """ |
| Analyze model introductions in the Transformers repo over the last ~2 years and |
| classify each introduced model as "moe" vs "dense" using a heuristic regex. |
| |
| Outputs (in ./moe_dense_analysis): |
| - moe_dense_models_raw.csv : all models + inferred intro date + moe/dense label |
| - moe_dense_models_2y_window.csv : only models introduced in the last ~2 years |
| - moe_dense_2y_timeline.csv : monthly cumulative counts (moe/dense/total) over the window |
| - moe_dense_2y_timeline.png : plot of cumulative counts |
| """ |
|
|
| |
| |
| |
| import calendar |
| import csv |
| import datetime as dt |
| import re |
| import subprocess |
| from pathlib import Path |
|
|
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.dates as mdates |
| import matplotlib.pyplot as plt |
|
|
|
|
| |
| |
| |
| repo = Path(".").resolve() |
|
|
| models_root = repo / "src/transformers/models" |
| if not models_root.exists(): |
| raise SystemExit("Run this from the transformers repo root.") |
|
|
| out_dir = repo / "moe_dense_analysis" |
| out_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
| |
| |
| |
| today = dt.date.today() |
|
|
| |
| try: |
| start_date = today.replace(year=today.year - 2) |
| except ValueError: |
| |
| start_date = today.replace(year=today.year - 2, day=28) |
|
|
| end_date = today |
|
|
|
|
| |
| |
| |
| |
| |
| |
| model_names = [] |
| for model_dir in sorted(models_root.iterdir()): |
| if not model_dir.is_dir(): |
| continue |
| if model_dir.name.startswith("__"): |
| continue |
|
|
| modeling_file = model_dir / f"modeling_{model_dir.name}.py" |
| if modeling_file.exists(): |
| model_names.append(model_dir.name) |
|
|
| model_name_set = set(model_names) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| git_out = subprocess.run( |
| [ |
| "git", |
| "log", |
| "--diff-filter=A", |
| "--name-only", |
| "--format=DATE %ad", |
| "--date=short", |
| "--", |
| "src/transformers/models", |
| ], |
| cwd=repo, |
| check=True, |
| text=True, |
| capture_output=True, |
| ).stdout |
|
|
| intro_dates = {} |
| current_date = None |
|
|
| for raw_line in git_out.splitlines(): |
| line = raw_line.strip() |
| if not line: |
| continue |
|
|
| |
| if line.startswith("DATE "): |
| current_date = line.split(" ", 1)[1] |
| continue |
|
|
| |
| if current_date is None: |
| continue |
| if not line.startswith("src/transformers/models/"): |
| continue |
|
|
| |
| |
| parts = line.split("/") |
| if len(parts) < 4: |
| continue |
|
|
| model_name = parts[3] |
| if model_name not in model_name_set: |
| continue |
|
|
| |
| old = intro_dates.get(model_name) |
| if old is None or current_date < old: |
| intro_dates[model_name] = current_date |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| moe_class_re = re.compile( |
| r"^class\s+([A-Za-z0-9_]*(?:MoE|MOE|Moe|Expert|Experts)[A-Za-z0-9_]*)" |
| r"\s*\(\s*(?:nn|torch\.nn)\.Module\s*\)\s*:", |
| re.MULTILINE, |
| ) |
|
|
| records = [] |
| for model_name in model_names: |
| intro = intro_dates.get(model_name) |
| if intro is None: |
| |
| continue |
|
|
| modeling_file = models_root / model_name / f"modeling_{model_name}.py" |
| text = modeling_file.read_text(encoding="utf-8", errors="ignore") |
|
|
| matches = sorted(set(moe_class_re.findall(text))) |
| label = "moe" if matches else "dense" |
|
|
| records.append( |
| { |
| "model": model_name, |
| "introduced_date": intro, |
| "is_moe": label, |
| "moe_class_matches": ";".join(matches), |
| "modeling_file": str(modeling_file.relative_to(repo)), |
| } |
| ) |
|
|
| |
| records.sort(key=lambda row: (row["introduced_date"], row["model"])) |
|
|
|
|
| |
| |
| |
| window_records = [] |
| for row in records: |
| intro_obj = dt.datetime.strptime(row["introduced_date"], "%Y-%m-%d").date() |
| if start_date <= intro_obj <= end_date: |
| row_copy = dict(row) |
| row_copy["intro_obj"] = intro_obj |
| window_records.append(row_copy) |
|
|
| window_records.sort(key=lambda row: (row["intro_obj"], row["model"])) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| points = [start_date] |
| while points[-1] < end_date: |
| last = points[-1] |
|
|
| |
| year = last.year + (last.month // 12) |
| month = 1 if last.month == 12 else last.month + 1 |
| day = min(last.day, calendar.monthrange(year, month)[1]) |
|
|
| next_month = dt.date(year, month, day) |
|
|
| if next_month > end_date: |
| break |
| points.append(next_month) |
|
|
| |
| if points[-1] != end_date: |
| points.append(end_date) |
|
|
|
|
| |
| |
| |
| timeline_rows = [] |
| for point in points: |
| moe_cum = sum( |
| 1 |
| for row in window_records |
| if row["is_moe"] == "moe" and row["intro_obj"] <= point |
| ) |
| dense_cum = sum( |
| 1 |
| for row in window_records |
| if row["is_moe"] == "dense" and row["intro_obj"] <= point |
| ) |
|
|
| timeline_rows.append( |
| { |
| "date": point.isoformat(), |
| "moe_cumulative": moe_cum, |
| "dense_cumulative": dense_cum, |
| "total_cumulative": moe_cum + dense_cum, |
| } |
| ) |
|
|
|
|
| |
| |
| |
| raw_csv = out_dir / "moe_dense_models_raw.csv" |
| with raw_csv.open("w", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter( |
| f, |
| fieldnames=["model", "introduced_date", "is_moe", "moe_class_matches", "modeling_file"], |
| ) |
| writer.writeheader() |
| writer.writerows(records) |
|
|
| window_csv = out_dir / "moe_dense_models_2y_window.csv" |
| with window_csv.open("w", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter( |
| f, |
| fieldnames=["model", "introduced_date", "is_moe", "moe_class_matches", "modeling_file"], |
| ) |
| writer.writeheader() |
| for row in window_records: |
| copy_row = dict(row) |
| copy_row.pop("intro_obj", None) |
| writer.writerow(copy_row) |
|
|
| timeline_csv = out_dir / "moe_dense_2y_timeline.csv" |
| with timeline_csv.open("w", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter( |
| f, |
| fieldnames=["date", "moe_cumulative", "dense_cumulative", "total_cumulative"], |
| ) |
| writer.writeheader() |
| writer.writerows(timeline_rows) |
|
|
|
|
| |
| |
| |
| x = [dt.datetime.strptime(row["date"], "%Y-%m-%d").date() for row in timeline_rows] |
| |
| y_moe = [row["moe_cumulative"] for row in timeline_rows] |
|
|
| plt.figure(figsize=(11, 6)) |
| |
| plt.plot(x, y_moe, label="MoE cumulative", linewidth=2.2) |
|
|
| |
| plt.title(f"MoE model introductions ({start_date} to {end_date})") |
| plt.xlabel("Date") |
| plt.ylabel("Model count") |
| plt.grid(alpha=0.3) |
| plt.legend() |
|
|
| ax = plt.gca() |
| ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m")) |
| plt.xticks(rotation=45, ha="right") |
| plt.tight_layout() |
|
|
| plot_png = out_dir / "moe_dense_2y_timeline.png" |
| plt.savefig(plot_png, dpi=180) |
|
|
|
|
| |
| |
| |
| dense_total = sum(1 for row in window_records if row["is_moe"] == "dense") |
| moe_total = sum(1 for row in window_records if row["is_moe"] == "moe") |
|
|
| print(f"Window: {start_date} -> {end_date}") |
| print(f"Introduced in window: dense={dense_total}, moe={moe_total}, total={dense_total + moe_total}") |
| print(f"Wrote {raw_csv}") |
| print(f"Wrote {window_csv}") |
| print(f"Wrote {timeline_csv}") |
| print(f"Wrote {plot_png}") |
|
|