diff --git "a/data/prs.json" "b/data/prs.json" --- "a/data/prs.json" +++ "b/data/prs.json" @@ -1,24759 +1,24573 @@ [ { - "additions": 138, - "author": "SunMarc", + "additions": 1, + "author": "ydshieh", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This PR updates the support for response api. I was mainly basing myself on chat completion api but there are minor differences with it e.g `input_image` vs `image_url` for `type` or `input_text` vs `text`, differen\u2026", - "changed_files": 5, + "body_excerpt": "# What does this PR do? Minor update", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/45463", - "created_at": "2026-04-15T15:56:45Z", - "deletions": 174, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45484", + "created_at": "2026-04-16T20:10:45Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45463/files", - "html_url": "https://github.com/huggingface/transformers/pull/45463", + "files_url": "https://github.com/huggingface/transformers/pull/45484/files", + "html_url": "https://github.com/huggingface/transformers/pull/45484", "labels": [], - "merged": false, - "number": 45463, - "review_comments_count": 2, - "state": "open", - "title": "Fix response api support ", - "updated_at": "2026-04-15T20:42:24Z" + "merged": true, + "number": 45484, + "review_comments_count": 0, + "state": "closed", + "title": "Minor update", + "updated_at": "2026-04-16T20:27:48Z" }, { - "additions": 118, - "author": "tarekziade", + "additions": 36, + "author": "vasqu", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Activated some bandit rules and fixed a few spots", - "changed_files": 33, + "body_excerpt": "Fixup some conversion mappings --> everything should live directly under `mapping` except we add onto it - Solar change is the same as qwen2 moe - Cohere moved - Ernie moe similar to minimax with one additional rename", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 5, - "conversation_url": "https://github.com/huggingface/transformers/pull/45462", - "created_at": "2026-04-15T15:37:15Z", - "deletions": 67, + "conversation_url": "https://github.com/huggingface/transformers/pull/45483", + "created_at": "2026-04-16T15:59:26Z", + "deletions": 64, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45462/files", - "html_url": "https://github.com/huggingface/transformers/pull/45462", + "files_url": "https://github.com/huggingface/transformers/pull/45483/files", + "html_url": "https://github.com/huggingface/transformers/pull/45483", "labels": [], "merged": false, - "number": 45462, + "number": 45483, "review_comments_count": 0, "state": "open", - "title": "chore(sec): added a handful of security checks", - "updated_at": "2026-04-15T16:04:51Z" + "title": "[`Conversion Mapping`] Small fixups", + "updated_at": "2026-04-16T16:19:28Z" }, { - "additions": 0, - "author": "JiauZhang", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Remove redundant condition checks in `get_image_size` method", - "changed_files": 1, + "additions": 184, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": ".", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45461", - "created_at": "2026-04-15T14:00:11Z", - "deletions": 8, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/45481", + "created_at": "2026-04-16T15:51:21Z", + "deletions": 185, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45461/files", - "html_url": "https://github.com/huggingface/transformers/pull/45461", + "files_url": "https://github.com/huggingface/transformers/pull/45481/files", + "html_url": "https://github.com/huggingface/transformers/pull/45481", "labels": [], "merged": false, - "number": 45461, - "review_comments_count": 2, + "number": 45481, + "review_comments_count": 1, "state": "open", - "title": "Remove redundant condition checks in `get_image_size` method", - "updated_at": "2026-04-15T15:03:42Z" + "title": "Add check-auto in repo-consistency and fix sorting", + "updated_at": "2026-04-16T16:15:48Z" }, { - "additions": 5, - "author": "cloudyun888", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary When protobuf is not installed, is a function call used as an expression. Because it is evaluated lazily when the try block raises, the resulting from the function itself bypasses the RuntimeError and OSError handlers below it,\u2026", - "changed_files": 1, + "additions": 27, + "author": "SunMarc", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? This PR fixes quantization tests. A few things were deprecated when compressed-tensors had their latest release, so i'm updating the tests. For fouroversix, it's just that the model was a bit too big for the CI", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/45460", - "created_at": "2026-04-15T13:18:36Z", - "deletions": 5, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/45480", + "created_at": "2026-04-16T15:23:30Z", + "deletions": 86, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45460/files", - "html_url": "https://github.com/huggingface/transformers/pull/45460", + "files_url": "https://github.com/huggingface/transformers/pull/45480/files", + "html_url": "https://github.com/huggingface/transformers/pull/45480", "labels": [], "merged": false, - "number": 45460, + "number": 45480, "review_comments_count": 0, "state": "open", - "title": "fix(tokenization): re-raise ImportError to allow RuntimeError/OSError fallback (#45459)", - "updated_at": "2026-04-15T13:18:36Z" + "title": "Update quants tests ", + "updated_at": "2026-04-16T15:34:33Z" }, { - "additions": 8, - "author": "tomaarsen", + "additions": 321, + "author": "zucchini-nlp", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Currently, for qwen2_5_omni and qwen3_omni_moe, you can only load the 'Talker' variant, i.e. with the audio output. This is a bit like only being able to load a checkpoint with `AutoModelForCausalLM` while `AutoMode\u2026", - "changed_files": 3, + "body_excerpt": "# What does this PR do? As per title, I think this pattern is used quite often and deserves to be a public mask-fn. Used currently in gemma/paligemma family, GIT, PI0 and will be used in two upcoming models (deepseekOcr and Molmo2) Imo it\u2026", + "changed_files": 9, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/45457", - "created_at": "2026-04-15T12:29:47Z", - "deletions": 2, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/45477", + "created_at": "2026-04-16T14:12:03Z", + "deletions": 593, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45457/files", - "html_url": "https://github.com/huggingface/transformers/pull/45457", + "files_url": "https://github.com/huggingface/transformers/pull/45477/files", + "html_url": "https://github.com/huggingface/transformers/pull/45477", "labels": [], "merged": false, - "number": 45457, - "review_comments_count": 4, + "number": 45477, + "review_comments_count": 0, "state": "open", - "title": "Allow loading Qwen Thinker 'base' models without generative head", - "updated_at": "2026-04-15T16:14:02Z" + "title": "[WIP] Add blockwise mask fn as opt arg for all masking functions", + "updated_at": "2026-04-16T14:42:38Z" }, { - "additions": 2, - "author": "tarekziade", + "additions": 14, + "author": "ydshieh", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? we're extending `ty` to more modules and we need stubs from more libs like openai.", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Call CI workflow", + "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45456", - "created_at": "2026-04-15T12:10:18Z", - "deletions": 2, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/45476", + "created_at": "2026-04-16T13:49:50Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45456/files", - "html_url": "https://github.com/huggingface/transformers/pull/45456", + "files_url": "https://github.com/huggingface/transformers/pull/45476/files", + "html_url": "https://github.com/huggingface/transformers/pull/45476", "labels": [], - "merged": true, - "number": 45456, + "merged": false, + "number": 45476, "review_comments_count": 0, - "state": "closed", - "title": "refactor(qa): extend extras so ty can run on server modules", - "updated_at": "2026-04-15T16:08:23Z" + "state": "open", + "title": "[Don't merge] Call CI workflow", + "updated_at": "2026-04-16T21:04:16Z" }, { - "additions": 3, - "author": "tomaarsen", + "additions": 28, + "author": "tarekziade", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? There's 2 changes, one is a definite fix and one is a preference. Some background: there are a lot of models that have finetuned `qwen2_5_omni`, e.g. https://huggingface.co/LCO-Embedding/LCO-Embedding-Omni-3B, and i\u2026", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Split out the `mlinter` tool. We want to be able to: - use it from other CI projects - remove the ability to alter the linter from Transformers PRs This change should keep the exact same behavior than before", + "changed_files": 27, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/45455", - "created_at": "2026-04-15T11:38:05Z", - "deletions": 25, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/45475", + "created_at": "2026-04-16T12:01:49Z", + "deletions": 2661, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45455/files", - "html_url": "https://github.com/huggingface/transformers/pull/45455", + "files_url": "https://github.com/huggingface/transformers/pull/45475/files", + "html_url": "https://github.com/huggingface/transformers/pull/45475", "labels": [], "merged": false, - "number": 45455, - "review_comments_count": 5, + "number": 45475, + "review_comments_count": 0, "state": "open", - "title": "[`fix`] Make Qwen2_5OmniProcessor warning a lot less noisy via warning_once", - "updated_at": "2026-04-15T12:23:04Z" + "title": "chore(qa): split out mlinter", + "updated_at": "2026-04-16T17:09:22Z" }, { - "additions": 81, - "author": "zucchini-nlp", + "additions": 2, + "author": "rtrompier", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45200 As per title, this error was actually needed only in PG. Other models don't have such prefix/suffix separation when training", - "changed_files": 7, + "body_excerpt": "Bump the pinned doc-builder SHA so that main documentation builds also sync to the HF bucket (dual-write).", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 12, - "conversation_url": "https://github.com/huggingface/transformers/pull/45454", - "created_at": "2026-04-15T11:11:34Z", - "deletions": 104, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45454/files", - "html_url": "https://github.com/huggingface/transformers/pull/45454", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45474", + "created_at": "2026-04-16T11:59:17Z", + "deletions": 2, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/45474/files", + "html_url": "https://github.com/huggingface/transformers/pull/45474", "labels": [], "merged": false, - "number": 45454, + "number": 45474, "review_comments_count": 0, "state": "open", - "title": "Gemma4 training with text-only samples", - "updated_at": "2026-04-15T15:51:09Z" + "title": "chore: bump doc-builder SHA for main doc build workflow", + "updated_at": "2026-04-16T12:09:45Z" }, { - "additions": 466, - "author": "ArthurZucker", + "additions": 201, + "author": "AmineDiro", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Ai init", + "body_excerpt": "While benchmarking Qwen3-30B-A3B SFT training with Expert Parallelism (EP) using TRL, I found three bugs that combine to produce silently wrong results or NaN loss. Every existing test uses `tp_plan=\"auto\"` which bypasses `RouterParallel`\u2026", "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45453", - "created_at": "2026-04-15T10:22:17Z", - "deletions": 50, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/45453/files", - "html_url": "https://github.com/huggingface/transformers/pull/45453", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45473", + "created_at": "2026-04-16T10:59:14Z", + "deletions": 41, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/45473/files", + "html_url": "https://github.com/huggingface/transformers/pull/45473", "labels": [], "merged": false, - "number": 45453, + "number": 45473, "review_comments_count": 0, "state": "open", - "title": "Draft commit", - "updated_at": "2026-04-15T11:40:23Z" + "title": "Fix EP: RouterParallel shape, tp_plan property, grouped_mm sentinels", + "updated_at": "2026-04-16T13:56:10Z" }, { - "additions": 3058, - "author": "DavidSolanas", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Fixes #45306 ## What and why All `models/X/__init__.py` files used `from .module import *` inside the `TYPE_CHECKING` block. This makes it impossible for static analysis tools (pyright, mypy, IDEs) to know which symbols are actually export\u2026", - "changed_files": 446, + "additions": 2, + "author": "kevinmalana", + "author_association": "NONE", + "body_excerpt": "## What does this PR do? Fixes a crash in `get_device_properties()` in `testing_utils.py` when CUDA is installed on the system but no GPU device is present (e.g., a CPU-only cloud studio with CUDA libraries installed). The function called\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45452", - "created_at": "2026-04-15T08:44:00Z", - "deletions": 1305, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45472", + "created_at": "2026-04-16T10:03:07Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45452/files", - "html_url": "https://github.com/huggingface/transformers/pull/45452", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/45472/files", + "html_url": "https://github.com/huggingface/transformers/pull/45472", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 45452, + "number": 45472, "review_comments_count": 0, - "state": "open", - "title": "refactor: replace wildcard imports with explicit imports in model __init__.py files", - "updated_at": "2026-04-15T12:15:24Z" + "state": "closed", + "title": "fix(testing_utils): guard get_device_capability with torch.cuda.is_available()", + "updated_at": "2026-04-16T10:57:34Z" }, { - "additions": 20, - "author": "MukundaKatta", - "author_association": "NONE", - "body_excerpt": "Docstring/comment-only typo fixes across Qwen3-VL, Qwen3.5, GLM4V, GLM4.6V, GLM-OCR and their MoE variants. `seperate` -> `separate`. No behavior changes. I deliberately left `image_seperate.weight` in `convert_mm_grounding_dino_to_hf.py`\u2026", - "changed_files": 10, + "additions": 3413, + "author": "nuxlear", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Add EXAONE 4.5 architecture for the [EXAONE 4.5 model](https://huggingface.co/collections/LGAI-EXAONE/exaone-45) released by LG AI Research. This PR adds the modeling code for EXAONE 4.5, which uses the same LLM arc\u2026", + "changed_files": 16, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45451", - "created_at": "2026-04-15T08:33:29Z", - "deletions": 20, + "conversation_url": "https://github.com/huggingface/transformers/pull/45471", + "created_at": "2026-04-16T08:52:35Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45451/files", - "html_url": "https://github.com/huggingface/transformers/pull/45451", + "files_url": "https://github.com/huggingface/transformers/pull/45471/files", + "html_url": "https://github.com/huggingface/transformers/pull/45471", "labels": [], "merged": false, - "number": 45451, - "review_comments_count": 0, - "state": "closed", - "title": "Fix 'seperate' typo in qwen3/glm video-model docstrings", - "updated_at": "2026-04-15T10:57:26Z" + "number": 45471, + "review_comments_count": 15, + "state": "open", + "title": "Add EXAONE 4.5 implementations", + "updated_at": "2026-04-16T14:21:42Z" }, { - "additions": 1, - "author": "rtrompier", - "author_association": "MEMBER", - "body_excerpt": "Switch the PR doc upload flow from the legacy dataset push to the new HF bucket.", + "additions": 7, + "author": "kaixuanliu", + "author_association": "CONTRIBUTOR", + "body_excerpt": "@ydshieh pls help review, thx!", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45450", - "created_at": "2026-04-15T08:26:45Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45470", + "created_at": "2026-04-16T07:44:20Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45450/files", - "html_url": "https://github.com/huggingface/transformers/pull/45450", + "files_url": "https://github.com/huggingface/transformers/pull/45470/files", + "html_url": "https://github.com/huggingface/transformers/pull/45470", "labels": [], "merged": false, - "number": 45450, + "number": 45470, "review_comments_count": 0, "state": "open", - "title": "chore: bump doc-builder SHA for PR upload workflow", - "updated_at": "2026-04-15T10:13:31Z" + "title": "skip test_flash_attn_2_can_dispatch_composite_models tests for", + "updated_at": "2026-04-16T07:45:33Z" }, { - "additions": 1, - "author": "hmellor", - "author_association": "MEMBER", - "body_excerpt": "This model also has the wrong tokenizer class in its config", - "changed_files": 1, + "additions": 12, + "author": "Spectual", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "# What does this PR do? `PixioPatchEmbeddings.forward` already accepted an `interpolate_pos_encoding` flag (inherited from `ViTPatchEmbeddings`) to skip image-size validation and allow variable-resolution inputs. However, neither `PixioEmb\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45449", - "created_at": "2026-04-15T08:26:05Z", - "deletions": 0, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45469", + "created_at": "2026-04-16T06:54:56Z", + "deletions": 10, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45449/files", - "html_url": "https://github.com/huggingface/transformers/pull/45449", + "files_url": "https://github.com/huggingface/transformers/pull/45469/files", + "html_url": "https://github.com/huggingface/transformers/pull/45469", "labels": [], - "merged": true, - "number": 45449, + "merged": false, + "number": 45469, "review_comments_count": 0, - "state": "closed", - "title": "Add `step3_vl` to `MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS`", - "updated_at": "2026-04-15T09:28:46Z" + "state": "open", + "title": "Fix: propagate interpolate_pos_encoding through Pixio model hierarchy", + "updated_at": "2026-04-16T06:56:04Z" }, { - "additions": 178, - "author": "Cyrilvallez", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do?", - "changed_files": 3, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45448", - "created_at": "2026-04-15T08:06:53Z", - "deletions": 54, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45448/files", - "html_url": "https://github.com/huggingface/transformers/pull/45448", - "labels": [], - "merged": false, - "number": 45448, - "review_comments_count": 0, - "state": "open", - "title": "Fix clips", - "updated_at": "2026-04-15T14:29:41Z" - }, - { - "additions": 1, - "author": "ZSLsherly", - "author_association": "FIRST_TIMER", - "body_excerpt": "This commit corrects the PyTorch version check for importing `AuxRequest` from `torch.nn.attention.flex_attention`(line51). The `AuxRequest` class was actually introduced in PyTorch 2.9.1, not 2.9.0. The current code attempts to import it\u2026", + "additions": 17, + "author": "Jah-yee", + "author_association": "NONE", + "body_excerpt": "Good day, ## Problem On Apple Silicon (MPS backend), `torch.nn.functional.scaled_dot_product_attention` produces incorrect output when the value tensor's head dimension differs from the query tensor's head dimension. This affects DeepSeek\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 8, - "conversation_url": "https://github.com/huggingface/transformers/pull/45445", - "created_at": "2026-04-15T03:09:38Z", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45467", + "created_at": "2026-04-16T06:44:51Z", "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45445/files", - "html_url": "https://github.com/huggingface/transformers/pull/45445", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/45467/files", + "html_url": "https://github.com/huggingface/transformers/pull/45467", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 45445, - "review_comments_count": 1, + "number": 45467, + "review_comments_count": 0, "state": "closed", - "title": "Update Torch version check for flex attention", - "updated_at": "2026-04-15T11:40:34Z" + "title": "Fix MPS SDPA output shape when value head dim differs from query head dim", + "updated_at": "2026-04-16T10:53:42Z" }, { - "additions": 50, - "author": "tomaarsen", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Resolves https://github.com/huggingface/sentence-transformers/issues/3724 ## Code Agent Policy - [x] I confirm that this is not a pure code agent PR. ## Before submitting - [ ] This PR fixes a typo or improves the d\u2026", - "changed_files": 2, + "additions": 3, + "author": "Jah-yee", + "author_association": "NONE", + "body_excerpt": "Fixes #45459 - Previously import_protobuf_decode_error() raised ImportError when protobuf wasn't installed even for other exceptions, hiding the real error. Now returns empty tuple () so the actual exception propagates.", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 6, - "conversation_url": "https://github.com/huggingface/transformers/pull/45444", - "created_at": "2026-04-14T19:28:34Z", - "deletions": 20, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/45466", + "created_at": "2026-04-16T00:55:40Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45444/files", - "html_url": "https://github.com/huggingface/transformers/pull/45444", + "files_url": "https://github.com/huggingface/transformers/pull/45466/files", + "html_url": "https://github.com/huggingface/transformers/pull/45466", "labels": [], "merged": false, - "number": 45444, - "review_comments_count": 4, - "state": "open", - "title": "[`fix`] Always early return for non-Mistral models in _patch_mistral_regex", - "updated_at": "2026-04-15T12:03:55Z" + "number": 45466, + "review_comments_count": 0, + "state": "closed", + "title": "fix: return empty tuple when protobuf not available", + "updated_at": "2026-04-16T03:16:39Z" }, { - "additions": 38, - "author": "qgallouedec", + "additions": 65, + "author": "stevhliu", "author_association": "MEMBER", - "body_excerpt": "When `transformers serve` is launched with a positional model argument, the server silently overwrites the `\"model\"` field in every incoming request with the pinned model id. This is surprising: a client that asks for model B receives a re\u2026", - "changed_files": 2, + "body_excerpt": "refactors the \"Contribute to Transformers\" doc to be more lightweight and an easy entry point that links out to more dedicated guides", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45443", - "created_at": "2026-04-14T19:14:10Z", - "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45443/files", - "html_url": "https://github.com/huggingface/transformers/pull/45443", + "conversation_url": "https://github.com/huggingface/transformers/pull/45465", + "created_at": "2026-04-15T23:12:11Z", + "deletions": 469, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/45465/files", + "html_url": "https://github.com/huggingface/transformers/pull/45465", "labels": [], "merged": false, - "number": 45443, + "number": 45465, "review_comments_count": 0, "state": "open", - "title": "Raise 400 on model mismatch when `transformers serve` is pinned", - "updated_at": "2026-04-15T11:42:44Z" + "title": "[docs] contributing", + "updated_at": "2026-04-15T23:22:12Z" }, { - "additions": 4, - "author": "paulinebm", + "additions": 186, + "author": "SunMarc", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026", - "changed_files": 8, + "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026", + "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/45294", - "created_at": "2026-04-07T17:51:38Z", - "deletions": 4, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45294/files", - "html_url": "https://github.com/huggingface/transformers/pull/45294", + "conversation_url": "https://github.com/huggingface/transformers/pull/45350", + "created_at": "2026-04-09T17:46:37Z", + "deletions": 0, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/45350/files", + "html_url": "https://github.com/huggingface/transformers/pull/45350", "labels": [], "merged": false, - "number": 45294, + "number": 45350, "review_comments_count": 0, "state": "open", - "title": "feat: add Gemma4ForSequenceClassification", - "updated_at": "2026-04-13T13:05:41Z" + "title": "WIP: Add support for Granite4VisionForConditionalGeneration", + "updated_at": "2026-04-10T12:34:50Z" }, { - "additions": 4, - "author": "yonigozlan", - "author_association": "MEMBER", - "body_excerpt": "## Fix global state leak in `AutoTokenizer.register` causing test failures ### Problem `test_from_pretrained_dynamic_processor` was failing when run as part of the full test class with: ``` AttributeError: NewTokenizer has no attribute spe\u2026", + "additions": 90, + "author": "florian6973", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Fixes #45305 Add a regression test in `TrainerGradientAccumulationTest` to avoid passing the GAS value to Accelerate by mistake Description: I force the value of the `num_steps` parameter to be 1, and the regression\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/45293", - "created_at": "2026-04-07T16:29:25Z", - "deletions": 16, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/45349", + "created_at": "2026-04-09T17:24:39Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45293/files", - "html_url": "https://github.com/huggingface/transformers/pull/45293", - "labels": [], - "merged": false, - "number": 45293, - "review_comments_count": 1, - "state": "open", - "title": "Fix \"AttributeError: NewTokenizer has no attribute special_attribute_present\" (Remove `REGISTERED_FAST_ALIASES`)", - "updated_at": "2026-04-09T12:04:21Z" + "files_url": "https://github.com/huggingface/transformers/pull/45349/files", + "html_url": "https://github.com/huggingface/transformers/pull/45349", + "labels": [ + "for patch" + ], + "merged": true, + "number": 45349, + "review_comments_count": 6, + "state": "closed", + "title": "Fix #45305 + add regression test GAS", + "updated_at": "2026-04-13T14:41:43Z" }, { - "additions": 2958, - "author": "DatLe203", - "author_association": "NONE", - "body_excerpt": "# What does this PR do? save locally --> local locally) ```\u2026", - "changed_files": 2, + "additions": 307, + "author": "BillionClaw", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44730", - "created_at": "2026-03-15T20:44:32Z", - "deletions": 4, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44785", + "created_at": "2026-03-17T06:55:31Z", + "deletions": 83, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44730/files", - "html_url": "https://github.com/huggingface/transformers/pull/44730", + "files_url": "https://github.com/huggingface/transformers/pull/44785/files", + "html_url": "https://github.com/huggingface/transformers/pull/44785", "labels": [], - "merged": true, - "number": 44730, - "review_comments_count": 6, + "merged": false, + "number": 44785, + "review_comments_count": 0, "state": "closed", - "title": "Fix `mlcd` auto config/model/mapping issues", - "updated_at": "2026-03-16T12:12:30Z" + "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM", + "updated_at": "2026-03-17T10:28:06Z" }, { - "additions": 214, - "author": "xenova", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026", - "changed_files": 58, + "additions": 2, + "author": "BillionClaw", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44729", - "created_at": "2026-03-15T20:29:38Z", - "deletions": 225, + "conversation_url": "https://github.com/huggingface/transformers/pull/44783", + "created_at": "2026-03-17T05:58:54Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44729/files", - "html_url": "https://github.com/huggingface/transformers/pull/44729", + "files_url": "https://github.com/huggingface/transformers/pull/44783/files", + "html_url": "https://github.com/huggingface/transformers/pull/44783", "labels": [], "merged": false, - "number": 44729, + "number": 44783, "review_comments_count": 0, - "state": "open", - "title": "Avoid floating point math for ceil operations", - "updated_at": "2026-03-15T20:49:34Z" + "state": "closed", + "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer", + "updated_at": "2026-04-16T10:47:47Z" }, { - "additions": 88, - "author": "ajmeese7", - "author_association": "NONE", - "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026", - "changed_files": 2, + "additions": 6, + "author": "JiwaniZakir", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44728", - "created_at": "2026-03-15T19:56:44Z", - "deletions": 1, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44782", + "created_at": "2026-03-17T05:11:36Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44728/files", - "html_url": "https://github.com/huggingface/transformers/pull/44728", + "files_url": "https://github.com/huggingface/transformers/pull/44782/files", + "html_url": "https://github.com/huggingface/transformers/pull/44782", "labels": [], - "merged": false, - "number": 44728, + "merged": true, + "number": 44782, "review_comments_count": 0, "state": "closed", - "title": "Fix float16 memory leak during 4-bit quantized model loading", - "updated_at": "2026-03-16T20:53:54Z" + "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward", + "updated_at": "2026-03-19T13:30:48Z" }, { - "additions": 202, - "author": "LincolnBurrows2017", + "additions": 5, + "author": "bensons", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.", - "changed_files": 11, + "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44727", - "created_at": "2026-03-15T19:41:24Z", - "deletions": 33, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44781", + "created_at": "2026-03-17T04:59:02Z", + "deletions": 2849, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44727/files", - "html_url": "https://github.com/huggingface/transformers/pull/44727", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44781/files", + "html_url": "https://github.com/huggingface/transformers/pull/44781", + "labels": [], "merged": false, - "number": 44727, + "number": 44781, "review_comments_count": 0, - "state": "closed", - "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file", - "updated_at": "2026-03-18T13:15:46Z" + "state": "open", + "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`", + "updated_at": "2026-03-27T23:19:21Z" }, { - "additions": 198, + "additions": 145, "author": "LincolnBurrows2017", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).", - "changed_files": 10, + "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.", + "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44725", - "created_at": "2026-03-15T17:41:18Z", - "deletions": 29, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44778", + "created_at": "2026-03-16T23:41:29Z", + "deletions": 28, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44725/files", - "html_url": "https://github.com/huggingface/transformers/pull/44725", + "files_url": "https://github.com/huggingface/transformers/pull/44778/files", + "html_url": "https://github.com/huggingface/transformers/pull/44778", "labels": [ "Code agent slop" ], "merged": false, - "number": 44725, + "number": 44778, "review_comments_count": 0, "state": "closed", - "title": "fix: replace bare except with Exception in Fuyu image processing", - "updated_at": "2026-03-18T13:16:22Z" + "title": "fix: correct logic error in is_tiktoken_available function", + "updated_at": "2026-03-18T13:15:37Z" }, { - "additions": 6, - "author": "ydshieh", + "additions": 35, + "author": "stevhliu", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? TO be explained.", - "changed_files": 5, + "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44724", - "created_at": "2026-03-15T17:14:12Z", - "deletions": 5, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44724/files", - "html_url": "https://github.com/huggingface/transformers/pull/44724", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44777", + "created_at": "2026-03-16T21:58:40Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44777/files", + "html_url": "https://github.com/huggingface/transformers/pull/44777", "labels": [], - "merged": false, - "number": 44724, + "merged": true, + "number": 44777, "review_comments_count": 1, - "state": "open", - "title": "Fix some missing / incorrect entries in auto files", - "updated_at": "2026-03-16T09:59:56Z" + "state": "closed", + "title": "[docs] is_causal feature", + "updated_at": "2026-03-17T19:50:43Z" }, { - "additions": 12, - "author": "aashirpersonal", - "author_association": "NONE", - "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026", - "changed_files": 2, + "additions": 0, + "author": "stevhliu", + "author_association": "MEMBER", + "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44723", - "created_at": "2026-03-15T16:52:03Z", - "deletions": 6, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44723/files", - "html_url": "https://github.com/huggingface/transformers/pull/44723", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44723, - "review_comments_count": 0, - "state": "closed", - "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", - "updated_at": "2026-03-18T15:05:52Z" - }, - { - "additions": 38, - "author": "chandan11248", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026", - "changed_files": 2, - "cluster_id": "cluster-43979-11", - "cluster_ids": [ - "cluster-43979-11" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44722", - "created_at": "2026-03-15T15:33:25Z", - "deletions": 110, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44776", + "created_at": "2026-03-16T20:43:33Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44722/files", - "html_url": "https://github.com/huggingface/transformers/pull/44722", + "files_url": "https://github.com/huggingface/transformers/pull/44776/files", + "html_url": "https://github.com/huggingface/transformers/pull/44776", "labels": [], - "merged": false, - "number": 44722, + "merged": true, + "number": 44776, "review_comments_count": 0, - "state": "open", - "title": "Refactor gptj output tracing to use standardized decorators", - "updated_at": "2026-03-19T18:12:59Z" - }, - { - "additions": 4, - "author": "rsmed31", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026", - "changed_files": 1, + "state": "closed", + "title": "[fix] mistral 4 docs", + "updated_at": "2026-03-16T21:11:29Z" + }, + { + "additions": 177, + "author": "stevhliu", + "author_association": "MEMBER", + "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44718", - "created_at": "2026-03-14T23:57:14Z", - "deletions": 3, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44718/files", - "html_url": "https://github.com/huggingface/transformers/pull/44718", + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44775", + "created_at": "2026-03-16T20:23:29Z", + "deletions": 109, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44775/files", + "html_url": "https://github.com/huggingface/transformers/pull/44775", "labels": [], "merged": false, - "number": 44718, + "number": 44775, "review_comments_count": 0, - "state": "closed", - "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", - "updated_at": "2026-03-15T17:58:58Z" + "state": "open", + "title": "[docs] n-d parallelism", + "updated_at": "2026-03-16T20:28:48Z" }, { - "additions": 15, + "additions": 0, "author": "ydshieh", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).", - "changed_files": 7, + "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44715", - "created_at": "2026-03-14T21:11:52Z", - "deletions": 2, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44774", + "created_at": "2026-03-16T18:56:19Z", + "deletions": 6, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44715/files", - "html_url": "https://github.com/huggingface/transformers/pull/44715", + "files_url": "https://github.com/huggingface/transformers/pull/44774/files", + "html_url": "https://github.com/huggingface/transformers/pull/44774", "labels": [], "merged": true, - "number": 44715, + "number": 44774, "review_comments_count": 0, "state": "closed", - "title": "Fix missing / incorrect `config` class in some model class definitions", - "updated_at": "2026-03-15T11:19:51Z" + "title": "Remove `is_causal` from `EuroBertConfig`", + "updated_at": "2026-03-17T09:33:21Z" }, { - "additions": 181, - "author": "LincolnBurrows2017", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026", - "changed_files": 8, + "additions": 3, + "author": "githubnemo", + "author_association": "MEMBER", + "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44714", - "created_at": "2026-03-14T20:42:46Z", - "deletions": 26, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44772", + "created_at": "2026-03-16T18:46:13Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44714/files", - "html_url": "https://github.com/huggingface/transformers/pull/44714", + "files_url": "https://github.com/huggingface/transformers/pull/44772/files", + "html_url": "https://github.com/huggingface/transformers/pull/44772", "labels": [], "merged": false, - "number": 44714, + "number": 44772, "review_comments_count": 0, - "state": "closed", - "title": "fix: propagate num_labels to text_config for Qwen models", - "updated_at": "2026-03-18T12:56:27Z" + "state": "open", + "title": "bitsandbytes: Update links and docs", + "updated_at": "2026-03-17T15:57:56Z" }, { - "additions": 15, - "author": "kulkarni-rohan", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026", - "changed_files": 2, + "additions": 2, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? wtf", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44713", - "created_at": "2026-03-14T20:20:14Z", - "deletions": 28, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44713/files", - "html_url": "https://github.com/huggingface/transformers/pull/44713", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44771", + "created_at": "2026-03-16T18:45:11Z", + "deletions": 1, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44771/files", + "html_url": "https://github.com/huggingface/transformers/pull/44771", "labels": [], "merged": false, - "number": 44713, + "number": 44771, "review_comments_count": 0, "state": "open", - "title": "[ColQwen2] Refactor output tracing (issue #43979)", - "updated_at": "2026-03-14T20:21:24Z" + "title": "wtf", + "updated_at": "2026-03-16T18:56:00Z" }, { - "additions": 2, - "author": "ydshieh", + "additions": 203, + "author": "zucchini-nlp", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.", - "changed_files": 1, + "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice", + "changed_files": 12, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44712", - "created_at": "2026-03-14T20:18:01Z", - "deletions": 2, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44770", + "created_at": "2026-03-16T18:44:03Z", + "deletions": 268, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44712/files", - "html_url": "https://github.com/huggingface/transformers/pull/44712", + "files_url": "https://github.com/huggingface/transformers/pull/44770/files", + "html_url": "https://github.com/huggingface/transformers/pull/44770", "labels": [], "merged": true, - "number": 44712, - "review_comments_count": 0, + "number": 44770, + "review_comments_count": 1, "state": "closed", - "title": "Update Nvidia CI docker file to use torch 2.10", - "updated_at": "2026-03-14T20:29:04Z" + "title": "Fix configs with `@strict`", + "updated_at": "2026-03-17T15:39:43Z" }, { - "additions": 339, - "author": "anuq", - "author_association": "NONE", - "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026", - "changed_files": 4, + "additions": 145, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026", + "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44711", - "created_at": "2026-03-14T19:21:21Z", - "deletions": 205, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44769", + "created_at": "2026-03-16T18:40:07Z", + "deletions": 28, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44711/files", - "html_url": "https://github.com/huggingface/transformers/pull/44711", + "files_url": "https://github.com/huggingface/transformers/pull/44769/files", + "html_url": "https://github.com/huggingface/transformers/pull/44769", "labels": [ "Code agent slop" ], "merged": false, - "number": 44711, + "number": 44769, "review_comments_count": 0, "state": "closed", - "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`", - "updated_at": "2026-03-20T13:36:58Z" + "title": "Fix: Handle empty lists in video_utils functions", + "updated_at": "2026-03-18T13:15:55Z" }, { - "additions": 12, - "author": "he-yufeng", - "author_association": "CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026", + "additions": 20, + "author": "michaelbenayoun", + "author_association": "MEMBER", + "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44710", - "created_at": "2026-03-14T18:33:53Z", - "deletions": 2, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44768", + "created_at": "2026-03-16T18:29:52Z", + "deletions": 9, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44710/files", - "html_url": "https://github.com/huggingface/transformers/pull/44710", + "files_url": "https://github.com/huggingface/transformers/pull/44768/files", + "html_url": "https://github.com/huggingface/transformers/pull/44768", "labels": [], "merged": true, - "number": 44710, - "review_comments_count": 0, + "number": 44768, + "review_comments_count": 3, "state": "closed", - "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs", - "updated_at": "2026-03-25T18:13:14Z" + "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026", + "updated_at": "2026-04-09T17:11:55Z" }, { - "additions": 6778, - "author": "LucasMa2025", - "author_association": "FIRST_TIMER", - "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026", - "changed_files": 15, + "additions": 11, + "author": "tarekziade", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44708", - "created_at": "2026-03-14T17:13:34Z", - "deletions": 7, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44708/files", - "html_url": "https://github.com/huggingface/transformers/pull/44708", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44767", + "created_at": "2026-03-16T17:31:26Z", + "deletions": 5, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44767/files", + "html_url": "https://github.com/huggingface/transformers/pull/44767", "labels": [], - "merged": false, - "number": 44708, - "review_comments_count": 0, + "merged": true, + "number": 44767, + "review_comments_count": 6, "state": "closed", - "title": "Add Configurable Generation Scheduler and State Machine for `generate()`", - "updated_at": "2026-03-14T19:19:11Z" + "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs", + "updated_at": "2026-03-16T19:02:31Z" }, { - "additions": 3, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026", - "changed_files": 1, + "additions": 26, + "author": "itazap", + "author_association": "MEMBER", + "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], - "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44707", - "created_at": "2026-03-14T17:12:16Z", - "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44707/files", - "html_url": "https://github.com/huggingface/transformers/pull/44707", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44707, + "cluster_role": null, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44766", + "created_at": "2026-03-16T17:30:23Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44766/files", + "html_url": "https://github.com/huggingface/transformers/pull/44766", + "labels": [], + "merged": true, + "number": 44766, "review_comments_count": 0, "state": "closed", - "title": "Add position_ids to MptForCausalLM forward pass", - "updated_at": "2026-03-18T13:39:36Z" + "title": "support xxxFast alias in v5 tokenizers", + "updated_at": "2026-03-18T13:40:05Z" }, { - "additions": 3, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026", - "changed_files": 1, + "additions": 19, + "author": "harshaljanjani", + "author_association": "CONTRIBUTOR", + "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44706", - "created_at": "2026-03-14T17:09:11Z", + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44765", + "created_at": "2026-03-16T17:26:22Z", "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44706/files", - "html_url": "https://github.com/huggingface/transformers/pull/44706", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44706, + "files_url": "https://github.com/huggingface/transformers/pull/44765/files", + "html_url": "https://github.com/huggingface/transformers/pull/44765", + "labels": [], + "merged": true, + "number": 44765, "review_comments_count": 0, "state": "closed", - "title": "Add position_ids to BloomForCausalLM forward pass", - "updated_at": "2026-03-18T13:39:51Z" + "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main", + "updated_at": "2026-03-20T13:55:55Z" }, { - "additions": 14, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026", - "changed_files": 1, + "additions": 12, + "author": "tarekziade", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44705", - "created_at": "2026-03-14T16:48:06Z", - "deletions": 1, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44764", + "created_at": "2026-03-16T17:15:40Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44705/files", - "html_url": "https://github.com/huggingface/transformers/pull/44705", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44705, - "review_comments_count": 0, + "files_url": "https://github.com/huggingface/transformers/pull/44764/files", + "html_url": "https://github.com/huggingface/transformers/pull/44764", + "labels": [], + "merged": true, + "number": 44764, + "review_comments_count": 2, "state": "closed", - "title": "Add position_ids to RoFormerForCausalLM forward pass", - "updated_at": "2026-03-18T13:40:05Z" + "title": "fix: sig lip import", + "updated_at": "2026-03-16T17:38:41Z" }, { - "additions": 26, - "author": "vasqu", + "additions": 17, + "author": "xenova", "author_association": "MEMBER", - "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44703", - "created_at": "2026-03-14T14:46:02Z", - "deletions": 10, + "comments_count": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/44763", + "created_at": "2026-03-16T17:04:36Z", + "deletions": 5, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44703/files", - "html_url": "https://github.com/huggingface/transformers/pull/44703", + "files_url": "https://github.com/huggingface/transformers/pull/44763/files", + "html_url": "https://github.com/huggingface/transformers/pull/44763", "labels": [], - "merged": true, - "number": 44703, + "merged": false, + "number": 44763, "review_comments_count": 1, "state": "closed", - "title": "[`FA`] Fix fa detection", - "updated_at": "2026-03-14T17:19:07Z" + "title": "[nemotron_h] Add support for MLP mixers", + "updated_at": "2026-04-14T13:46:14Z" }, { - "additions": 148, - "author": "LincolnBurrows2017", + "additions": 4, + "author": "BillionClaw", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026", - "changed_files": 8, + "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44702", - "created_at": "2026-03-14T14:41:15Z", - "deletions": 25, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44762", + "created_at": "2026-03-16T16:17:54Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44702/files", - "html_url": "https://github.com/huggingface/transformers/pull/44702", + "files_url": "https://github.com/huggingface/transformers/pull/44762/files", + "html_url": "https://github.com/huggingface/transformers/pull/44762", "labels": [ "Code agent slop" ], "merged": false, - "number": 44702, + "number": 44762, "review_comments_count": 0, "state": "closed", - "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig", - "updated_at": "2026-03-18T13:00:12Z" + "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation", + "updated_at": "2026-03-18T15:16:14Z" }, { - "additions": 219, - "author": "hmellor", + "additions": 152, + "author": "tarekziade", "author_association": "MEMBER", - "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026", - "changed_files": 52, + "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44699", - "created_at": "2026-03-14T11:44:24Z", - "deletions": 148, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44761", + "created_at": "2026-03-16T16:05:03Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44699/files", - "html_url": "https://github.com/huggingface/transformers/pull/44699", + "files_url": "https://github.com/huggingface/transformers/pull/44761/files", + "html_url": "https://github.com/huggingface/transformers/pull/44761", "labels": [], "merged": true, - "number": 44699, - "review_comments_count": 0, + "number": 44761, + "review_comments_count": 7, "state": "closed", - "title": "Fix several based models' pipeline parallel support", - "updated_at": "2026-03-20T13:53:27Z" + "title": "model-linter: Added rule 10", + "updated_at": "2026-03-17T08:52:19Z" }, { - "additions": 1, - "author": "hmellor", - "author_association": "MEMBER", - "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.", - "changed_files": 1, + "additions": 2090, + "author": "juliendenize", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? save locally --> local locally) ```\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44684", - "created_at": "2026-03-13T20:16:35Z", - "deletions": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44730", + "created_at": "2026-03-15T20:44:32Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44684/files", - "html_url": "https://github.com/huggingface/transformers/pull/44684", + "files_url": "https://github.com/huggingface/transformers/pull/44730/files", + "html_url": "https://github.com/huggingface/transformers/pull/44730", "labels": [], "merged": true, - "number": 44684, - "review_comments_count": 8, + "number": 44730, + "review_comments_count": 6, "state": "closed", - "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9", - "updated_at": "2026-03-18T11:44:18Z" + "title": "Fix `mlcd` auto config/model/mapping issues", + "updated_at": "2026-03-16T12:12:30Z" }, { - "additions": 301, - "author": "SunMarc", + "additions": 214, + "author": "xenova", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming", - "changed_files": 2, + "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026", + "changed_files": 58, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44682", - "created_at": "2026-03-13T18:52:41Z", - "deletions": 73, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44729", + "created_at": "2026-03-15T20:29:38Z", + "deletions": 225, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44682/files", - "html_url": "https://github.com/huggingface/transformers/pull/44682", + "files_url": "https://github.com/huggingface/transformers/pull/44729/files", + "html_url": "https://github.com/huggingface/transformers/pull/44729", "labels": [], "merged": false, - "number": 44682, + "number": 44729, "review_comments_count": 0, "state": "open", - "title": "transformers serve + llamacpp", - "updated_at": "2026-03-14T07:05:29Z" + "title": "Avoid floating point math for ceil operations", + "updated_at": "2026-03-15T20:49:34Z" }, { - "additions": 47, - "author": "dacorvo", - "author_association": "MEMBER", - "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026", + "additions": 88, + "author": "ajmeese7", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44680", - "created_at": "2026-03-13T17:55:54Z", + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44728", + "created_at": "2026-03-15T19:56:44Z", "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44680/files", - "html_url": "https://github.com/huggingface/transformers/pull/44680", + "files_url": "https://github.com/huggingface/transformers/pull/44728/files", + "html_url": "https://github.com/huggingface/transformers/pull/44728", "labels": [], "merged": false, - "number": 44680, - "review_comments_count": 12, - "state": "open", - "title": "Allow kernel modules to declare their preferred mask function", - "updated_at": "2026-04-14T19:29:06Z" + "number": 44728, + "review_comments_count": 0, + "state": "closed", + "title": "Fix float16 memory leak during 4-bit quantized model loading", + "updated_at": "2026-03-16T20:53:54Z" }, { - "additions": 9, - "author": "JokeYoonic", + "additions": 202, + "author": "LincolnBurrows2017", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026", - "changed_files": 1, + "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.", + "changed_files": 11, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44676", - "created_at": "2026-03-13T16:28:01Z", - "deletions": 2, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44727", + "created_at": "2026-03-15T19:41:24Z", + "deletions": 33, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44676/files", - "html_url": "https://github.com/huggingface/transformers/pull/44676", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44727/files", + "html_url": "https://github.com/huggingface/transformers/pull/44727", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44676, + "number": 44727, "review_comments_count": 0, - "state": "open", - "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights", - "updated_at": "2026-03-18T17:16:49Z" + "state": "closed", + "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file", + "updated_at": "2026-03-18T13:15:46Z" }, { - "additions": 32, - "author": "stevhliu", - "author_association": "MEMBER", - "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: \"Screenshot", - "changed_files": 1, + "additions": 198, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).", + "changed_files": 10, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44675", - "created_at": "2026-03-13T16:10:28Z", - "deletions": 14, + "conversation_url": "https://github.com/huggingface/transformers/pull/44725", + "created_at": "2026-03-15T17:41:18Z", + "deletions": 29, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44675/files", - "html_url": "https://github.com/huggingface/transformers/pull/44675", - "labels": [], - "merged": true, - "number": 44675, + "files_url": "https://github.com/huggingface/transformers/pull/44725/files", + "html_url": "https://github.com/huggingface/transformers/pull/44725", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44725, "review_comments_count": 0, "state": "closed", - "title": "[docs] cb config", - "updated_at": "2026-03-13T23:15:04Z" + "title": "fix: replace bare except with Exception in Fuyu image processing", + "updated_at": "2026-03-18T13:16:22Z" }, { - "additions": 408, - "author": "Rocketknight1", + "additions": 6, + "author": "ydshieh", "author_association": "MEMBER", - "body_excerpt": "We've had `parse_response()` in the library for a while, but it's been a soft launch / prototype feature. This PR cleans it up and documents it, making it an official feature! The API is largely unchanged from the prototype, but we drop `x\u2026", + "body_excerpt": "# What does this PR do? TO be explained.", "changed_files": 5, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44674", - "created_at": "2026-03-13T15:41:42Z", - "deletions": 34, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44724", + "created_at": "2026-03-15T17:14:12Z", + "deletions": 5, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44724/files", + "html_url": "https://github.com/huggingface/transformers/pull/44724", + "labels": [], + "merged": false, + "number": 44724, + "review_comments_count": 1, + "state": "open", + "title": "Fix some missing / incorrect entries in auto files", + "updated_at": "2026-03-16T09:59:56Z" + }, + { + "additions": 12, + "author": "aashirpersonal", + "author_association": "NONE", + "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44723", + "created_at": "2026-03-15T16:52:03Z", + "deletions": 6, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44723/files", + "html_url": "https://github.com/huggingface/transformers/pull/44723", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44723, + "review_comments_count": 0, + "state": "closed", + "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", + "updated_at": "2026-03-18T15:05:52Z" + }, + { + "additions": 38, + "author": "chandan11248", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026", + "changed_files": 2, + "cluster_id": "cluster-43979-11", + "cluster_ids": [ + "cluster-43979-11" + ], + "cluster_role": "member", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44722", + "created_at": "2026-03-15T15:33:25Z", + "deletions": 110, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44674/files", - "html_url": "https://github.com/huggingface/transformers/pull/44674", + "files_url": "https://github.com/huggingface/transformers/pull/44722/files", + "html_url": "https://github.com/huggingface/transformers/pull/44722", "labels": [], - "merged": true, - "number": 44674, - "review_comments_count": 11, - "state": "closed", - "title": "Officially launch parse_response", - "updated_at": "2026-03-24T15:55:05Z" + "merged": false, + "number": 44722, + "review_comments_count": 0, + "state": "open", + "title": "Refactor gptj output tracing to use standardized decorators", + "updated_at": "2026-03-19T18:12:59Z" }, { - "additions": 73, - "author": "remi-or", - "author_association": "MEMBER", - "body_excerpt": "This PR fixes a bug in continuous batching where non-CUDA devices cannot use the feature because some CUDA-exclusive objects are always instantiated. It also adds a test to make sure this will not break again in the future.", - "changed_files": 3, + "additions": 4, + "author": "rsmed31", + "author_association": "NONE", + "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44673", - "created_at": "2026-03-13T15:37:01Z", - "deletions": 15, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44718", + "created_at": "2026-03-14T23:57:14Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44673/files", - "html_url": "https://github.com/huggingface/transformers/pull/44673", + "files_url": "https://github.com/huggingface/transformers/pull/44718/files", + "html_url": "https://github.com/huggingface/transformers/pull/44718", "labels": [], - "merged": true, - "number": 44673, + "merged": false, + "number": 44718, "review_comments_count": 0, "state": "closed", - "title": "[CB] [Bug] Fix crashes when running without cuda", - "updated_at": "2026-03-15T23:59:55Z" + "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", + "updated_at": "2026-03-15T17:58:58Z" }, { - "additions": 1, - "author": "neo", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main", - "changed_files": 1, + "additions": 8, + "author": "haosenwang1018", + "author_association": "NONE", + "body_excerpt": "Replace bare `except:` clauses with `except Exception:` for PEP 8 compliance.", + "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44283", - "created_at": "2026-02-25T18:33:17Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44312", + "created_at": "2026-02-27T01:00:33Z", + "deletions": 8, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44283/files", - "html_url": "https://github.com/huggingface/transformers/pull/44283", + "files_url": "https://github.com/huggingface/transformers/pull/44312/files", + "html_url": "https://github.com/huggingface/transformers/pull/44312", "labels": [], - "merged": true, - "number": 44283, + "merged": false, + "number": 44312, "review_comments_count": 0, "state": "closed", - "title": "[`Modular`] Fix file type regression", - "updated_at": "2026-02-25T20:04:41Z" + "title": "fix: replace 8 bare except clauses with except Exception", + "updated_at": "2026-02-27T03:27:27Z" }, { - "additions": 5, - "author": "Rocketknight1", - "author_association": "MEMBER", - "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.", - "changed_files": 1, + "additions": 38, + "author": "onel", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Adds species bias documentation across the transformers repository to help model authors and users recognize and address potential biases in language models. The updates include guidance on documenting bias categori\u2026", + "changed_files": 5, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44282", - "created_at": "2026-02-25T17:57:54Z", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44311", + "created_at": "2026-02-27T00:02:49Z", "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44282/files", - "html_url": "https://github.com/huggingface/transformers/pull/44282", + "files_url": "https://github.com/huggingface/transformers/pull/44311/files", + "html_url": "https://github.com/huggingface/transformers/pull/44311", "labels": [], - "merged": true, - "number": 44282, + "merged": false, + "number": 44311, "review_comments_count": 0, "state": "closed", - "title": "Restore response_schema saving-loading", - "updated_at": "2026-02-25T18:27:22Z" + "title": "Add species bias documentation to model cards and docs", + "updated_at": "2026-02-27T14:09:20Z" }, { - "additions": 1, - "author": "ArthurZucker", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Its a very small fix for #44062", - "changed_files": 1, + "additions": 63, + "author": "onel", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Adds documentation for Pruna AI integration to the Transformers ecosystem, following the existing pattern used by vLLM and Unsloth integration docs. ## Changes - Created `docs/source/en/community_integrations/pruna.\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44281", - "created_at": "2026-02-25T16:28:37Z", + "conversation_url": "https://github.com/huggingface/transformers/pull/44310", + "created_at": "2026-02-27T00:00:31Z", "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44281/files", - "html_url": "https://github.com/huggingface/transformers/pull/44281", - "labels": [], - "merged": true, - "number": 44281, + "files_url": "https://github.com/huggingface/transformers/pull/44310/files", + "html_url": "https://github.com/huggingface/transformers/pull/44310", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44310, "review_comments_count": 0, "state": "closed", - "title": "Fix special token maps BC", - "updated_at": "2026-02-26T10:34:17Z" + "title": "docs: Add Pruna AI integration documentation", + "updated_at": "2026-02-27T14:08:21Z" }, { - "additions": 614, - "author": "RishabhMehra", - "author_association": "FIRST_TIMER", - "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026", - "changed_files": 3, + "additions": 129, + "author": "onel", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Adds SkyPilot deployment documentation to the DeepSpeed guide. The new section includes: - Introduction to SkyPilot as a unified framework for running AI workloads across clouds and Kubernetes - Complete example YAM\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44278", - "created_at": "2026-02-25T12:49:56Z", - "deletions": 63, + "conversation_url": "https://github.com/huggingface/transformers/pull/44309", + "created_at": "2026-02-26T22:44:41Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44278/files", - "html_url": "https://github.com/huggingface/transformers/pull/44278", + "files_url": "https://github.com/huggingface/transformers/pull/44309/files", + "html_url": "https://github.com/huggingface/transformers/pull/44309", "labels": [ "Code agent slop" ], "merged": false, - "number": 44278, + "number": 44309, "review_comments_count": 0, "state": "closed", - "title": "[FEAT] Pipelines - Faster group_entities", - "updated_at": "2026-02-25T13:54:58Z" + "title": "Add SkyPilot deployment documentation to DeepSpeed guide", + "updated_at": "2026-02-27T14:09:55Z" }, { - "additions": 105, - "author": "tarekziade", + "additions": 5854, + "author": "NielsRogge", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This patch makes the GLM-ASR doc example runnable by using `runnables` - see https://github.com/huggingface/doc-builder/blob/main/docs/runnable-code-blocks.md", - "changed_files": 5, + "body_excerpt": "# What does this PR do? needs a test", - "changed_files": 36, + "body_excerpt": "# What does this PR do? modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44253", - "created_at": "2026-02-24T13:03:40Z", - "deletions": 15, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44283", + "created_at": "2026-02-25T18:33:17Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44253/files", - "html_url": "https://github.com/huggingface/transformers/pull/44253", + "files_url": "https://github.com/huggingface/transformers/pull/44283/files", + "html_url": "https://github.com/huggingface/transformers/pull/44283", "labels": [], "merged": true, - "number": 44253, + "number": 44283, "review_comments_count": 0, "state": "closed", - "title": "Speed create_import_structure up with os.scandir()", - "updated_at": "2026-03-10T12:49:42Z" + "title": "[`Modular`] Fix file type regression", + "updated_at": "2026-02-25T20:04:41Z" }, { - "additions": 718, - "author": "zucchini-nlp", + "additions": 5, + "author": "Rocketknight1", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Deprecate timm backbone in favor of keeping all models within one `timm` folder, similar to other vision models. A backbone is just a variation of `PreTrainedModel`", - "changed_files": 61, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 8, - "conversation_url": "https://github.com/huggingface/transformers/pull/44252", - "created_at": "2026-02-24T13:00:59Z", - "deletions": 772, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44252/files", - "html_url": "https://github.com/huggingface/transformers/pull/44252", - "labels": [], - "merged": false, - "number": 44252, - "review_comments_count": 9, - "state": "open", - "title": "Timm unification continued", - "updated_at": "2026-02-26T13:35:44Z" - }, - { - "additions": 1951, - "author": "Sai-Suraj-27", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Model Page: https://huggingface.co/jinaai/jina-embeddings-v3 Model Paper: https://huggingface.co/papers/2409.10173 Downloads last month > **5.3M** Completes Part of https://github.com/huggingface/transformers/issues\u2026", - "changed_files": 13, + "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 29, - "conversation_url": "https://github.com/huggingface/transformers/pull/44251", - "created_at": "2026-02-24T12:56:24Z", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44282", + "created_at": "2026-02-25T17:57:54Z", "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44251/files", - "html_url": "https://github.com/huggingface/transformers/pull/44251", - "labels": [ - "New model" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44282/files", + "html_url": "https://github.com/huggingface/transformers/pull/44282", + "labels": [], "merged": true, - "number": 44251, - "review_comments_count": 74, + "number": 44282, + "review_comments_count": 0, "state": "closed", - "title": "Add `Jina-Embeddings-V3` Model", - "updated_at": "2026-03-19T10:07:57Z" + "title": "Restore response_schema saving-loading", + "updated_at": "2026-02-25T18:27:22Z" }, { - "additions": 5, - "author": "SunMarc", + "additions": 1, + "author": "ArthurZucker", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/pull/43806#discussion_r2834269455. We removed `self.report_to == \"all\"` functionality by mistake. Adding it back !", + "body_excerpt": "# What does this PR do? Its a very small fix for #44062", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44250", - "created_at": "2026-02-24T12:38:21Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44281", + "created_at": "2026-02-25T16:28:37Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44250/files", - "html_url": "https://github.com/huggingface/transformers/pull/44250", + "files_url": "https://github.com/huggingface/transformers/pull/44281/files", + "html_url": "https://github.com/huggingface/transformers/pull/44281", "labels": [], "merged": true, - "number": 44250, + "number": 44281, "review_comments_count": 0, "state": "closed", - "title": "fix regression report_to \"all\"", - "updated_at": "2026-02-24T12:55:06Z" + "title": "Fix special token maps BC", + "updated_at": "2026-02-26T10:34:17Z" }, { - "additions": 9, - "author": "Ryan-J-MAX", - "author_association": "NONE", - "body_excerpt": "## What does this PR fix? This PR adds backward compatibility for the deprecated `grouped_entities` parameter in the `TokenClassificationPipeline`. ## Problem The `grouped_entities` parameter was deprecated in favor of `aggregation_strateg\u2026", - "changed_files": 1, + "additions": 614, + "author": "RishabhMehra", + "author_association": "FIRST_TIMER", + "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44249", - "created_at": "2026-02-24T10:48:54Z", - "deletions": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44278", + "created_at": "2026-02-25T12:49:56Z", + "deletions": 63, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44249/files", - "html_url": "https://github.com/huggingface/transformers/pull/44249", + "files_url": "https://github.com/huggingface/transformers/pull/44278/files", + "html_url": "https://github.com/huggingface/transformers/pull/44278", "labels": [ "Code agent slop" ], "merged": false, - "number": 44249, + "number": 44278, "review_comments_count": 0, "state": "closed", - "title": "fix: add backward compatibility for grouped_entities parameter", - "updated_at": "2026-02-24T12:31:26Z" + "title": "[FEAT] Pipelines - Faster group_entities", + "updated_at": "2026-02-25T13:54:58Z" }, { - "additions": 12, - "author": "yonigozlan", + "additions": 105, + "author": "tarekziade", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fix backward compatibility with remote code for old processors not defining valid_kwargs (e.g. phi4) Cc @zucchini-nlp Fix `test_processor_override` for phi3v and phi4 in vllm @hmellor", - "changed_files": 1, + "body_excerpt": "# What does this PR do? This patch makes the GLM-ASR doc example runnable by using `runnables` - see https://github.com/huggingface/doc-builder/blob/main/docs/runnable-code-blocks.md", + "changed_files": 5, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44245", - "created_at": "2026-02-23T21:47:19Z", - "deletions": 4, + "comments_count": 36, + "conversation_url": "https://github.com/huggingface/transformers/pull/44277", + "created_at": "2026-02-25T08:49:20Z", + "deletions": 19, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44245/files", - "html_url": "https://github.com/huggingface/transformers/pull/44245", + "files_url": "https://github.com/huggingface/transformers/pull/44277/files", + "html_url": "https://github.com/huggingface/transformers/pull/44277", "labels": [], "merged": true, - "number": 44245, - "review_comments_count": 3, + "number": 44277, + "review_comments_count": 6, "state": "closed", - "title": "Fix image processors `from_dict` backward compatibility with old remote code", - "updated_at": "2026-02-24T15:17:37Z" + "title": "Use doc-builder runnable example for GLM-ASR", + "updated_at": "2026-04-02T16:16:55Z" }, { - "additions": 63, - "author": "thakoreh", + "additions": 0, + "author": "vishalpatil-45", "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44242 Load balancing loss was not being added when `output_router_logits=False` in Mixtral models. ## Changes - Fixed loss calculation to include load balancing even when router logits are not output - Added test case ##\u2026", - "changed_files": 2, + "body_excerpt": "# What does this PR do? This PR addresses the performance regression where `import transformers` takes ~3.5s. The issue was caused by eager imports of heavy backend libraries (like torch/numpy) during the initial module load. By moving the\u2026", + "changed_files": 0, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44243", - "created_at": "2026-02-23T21:27:09Z", - "deletions": 5, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44275", + "created_at": "2026-02-25T08:27:32Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44243/files", - "html_url": "https://github.com/huggingface/transformers/pull/44243", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44275/files", + "html_url": "https://github.com/huggingface/transformers/pull/44275", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44243, + "number": 44275, "review_comments_count": 0, "state": "closed", - "title": "fix: add load balancing loss when output_router_logits=False", - "updated_at": "2026-02-23T21:54:11Z" + "title": "[Fix] Restore lazy loading to improve import performance (#44273)", + "updated_at": "2026-02-25T20:37:18Z" }, { - "additions": 9, - "author": "yushiran", - "author_association": "CONTRIBUTOR", - "body_excerpt": "## Summary Adds missing `-> bool`, `-> int`, and `-> str | None` return type annotations to public utility functions in `utils/generic.py`, making them consistent with the newer `is_timm_config_dict` and `is_timm_local_checkpoint` function\u2026", - "changed_files": 1, + "additions": 559, + "author": "paipeline", + "author_association": "NONE", + "body_excerpt": "## Description Fixes #44242 This PR resolves an issue where the auxiliary load balancing loss was not computed when `output_router_logits=False`, even when `router_aux_loss_coef != 0`. ## Problem The auxiliary loss computation was incorrec\u2026", + "changed_files": 6, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44241", - "created_at": "2026-02-23T19:50:05Z", - "deletions": 9, + "conversation_url": "https://github.com/huggingface/transformers/pull/44274", + "created_at": "2026-02-25T06:38:02Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44241/files", - "html_url": "https://github.com/huggingface/transformers/pull/44241", - "labels": [], - "merged": true, - "number": 44241, + "files_url": "https://github.com/huggingface/transformers/pull/44274/files", + "html_url": "https://github.com/huggingface/transformers/pull/44274", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44274, "review_comments_count": 0, "state": "closed", - "title": "fix: add missing return type annotations to type-checking utilities in generic.py", - "updated_at": "2026-02-24T13:27:11Z" + "title": "Fix auxiliary load balancing loss computation when output_router_logits=False", + "updated_at": "2026-02-25T13:36:03Z" }, { - "additions": 2, - "author": "tarekziade", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Makes sure `find_bad_commit` always return the result `dict`", + "additions": 1, + "author": "hangjun-ezra", + "author_association": "CONTRIBUTOR", + "body_excerpt": "## What does this PR do? Fixes a `TypeError: unsupported operand type(s) for |: 'list' and 'set'` in `RotaryEmbeddingConfigMixin.convert_rope_params_to_dict` when `ignore_keys_at_rope_validation` is a `list` instead of a `set`. ### Root ca\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44240", - "created_at": "2026-02-23T19:12:49Z", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44272", + "created_at": "2026-02-25T03:52:04Z", "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44240/files", - "html_url": "https://github.com/huggingface/transformers/pull/44240", + "files_url": "https://github.com/huggingface/transformers/pull/44272/files", + "html_url": "https://github.com/huggingface/transformers/pull/44272", "labels": [], "merged": true, - "number": 44240, + "number": 44272, "review_comments_count": 0, "state": "closed", - "title": "Fix return value - fixes #44238", - "updated_at": "2026-02-24T13:02:59Z" + "title": "Fix TypeError in convert_rope_params_to_dict when ignore_keys is a list", + "updated_at": "2026-02-25T14:38:36Z" }, { - "additions": 253, - "author": "stevhliu", - "author_association": "MEMBER", - "body_excerpt": "part 2 of refactoring the training docs adds new dedicated guide to callbacks and data collators todo: - [x] backlink to `## Next steps` in `trainer.md` once https://github.com/huggingface/transformers/pull/44185 is merged", - "changed_files": 7, + "additions": 1272, + "author": "balak4", + "author_association": "CONTRIBUTOR", + "body_excerpt": "## Summary - Add GreedyLR, a metric-based adaptive learning rate scheduler that adjusts the learning rate during training based on the current loss - Based on [\"Dynamic Learning Rate Scheduling based on Loss Changes Leads to Faster Converg\u2026", + "changed_files": 10, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44239", - "created_at": "2026-02-23T18:54:55Z", - "deletions": 47, + "comments_count": 9, + "conversation_url": "https://github.com/huggingface/transformers/pull/44271", + "created_at": "2026-02-25T01:40:57Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44239/files", - "html_url": "https://github.com/huggingface/transformers/pull/44239", + "files_url": "https://github.com/huggingface/transformers/pull/44271/files", + "html_url": "https://github.com/huggingface/transformers/pull/44271", "labels": [], "merged": true, - "number": 44239, - "review_comments_count": 10, + "number": 44271, + "review_comments_count": 3, "state": "closed", - "title": "[docs] callbacks and collators", - "updated_at": "2026-02-24T22:12:46Z" + "title": "Add GreedyLR adaptive learning rate scheduler", + "updated_at": "2026-03-18T18:45:46Z" }, { - "additions": 1, - "author": "eustlb", + "additions": 88, + "author": "yonigozlan", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? small nit but will be misleading if not fixed", - "changed_files": 1, + "body_excerpt": "# What does this PR do? A lot of ProcessorsKwargs have incorrect/unspecified type hints in their ProcessorsKwargs TypedDict for their images_kwargs attribute. Functionnaly, this did not cause issues as \"_merge_kwargs\" automatically picks u\u2026", + "changed_files": 44, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44237", - "created_at": "2026-02-23T17:52:17Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44270", + "created_at": "2026-02-25T00:11:31Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44237/files", - "html_url": "https://github.com/huggingface/transformers/pull/44237", + "files_url": "https://github.com/huggingface/transformers/pull/44270/files", + "html_url": "https://github.com/huggingface/transformers/pull/44270", "labels": [], - "merged": true, - "number": 44237, + "merged": false, + "number": 44270, "review_comments_count": 0, - "state": "closed", - "title": "[mimi] nit", - "updated_at": "2026-02-24T15:43:55Z" + "state": "open", + "title": "Add correct typing to custom images_kwargs in ProcessorsKwargs", + "updated_at": "2026-02-25T01:12:06Z" }, { - "additions": 109, - "author": "SunMarc", + "additions": 30, + "author": "yonigozlan", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43847 When using zero3 + from_config, the model was incorrectly initialized as we were not gathering the params. Added a test also. cc @tohtana", - "changed_files": 5, + "body_excerpt": "# What does this PR do? This is a follow-up to https://github.com/huggingface/transformers/pull/43748, and will allow to have clickable links to the full modality kwargs when present in the docstring of a processor or image processor Cc @s\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44236", - "created_at": "2026-02-23T17:20:01Z", - "deletions": 3, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44269", + "created_at": "2026-02-25T00:05:47Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44236/files", - "html_url": "https://github.com/huggingface/transformers/pull/44236", + "files_url": "https://github.com/huggingface/transformers/pull/44269/files", + "html_url": "https://github.com/huggingface/transformers/pull/44269", "labels": [], "merged": true, - "number": 44236, + "number": 44269, "review_comments_count": 0, "state": "closed", - "title": "fix zero3 init config", - "updated_at": "2026-02-27T11:36:19Z" + "title": "Add `ProcessingKwargs` `ImagesKwargs` etc. to docs", + "updated_at": "2026-02-27T19:03:15Z" }, { - "additions": 1, - "author": "itazap", - "author_association": "MEMBER", - "body_excerpt": "UPDATE TO: https://github.com/huggingface/transformers/pull/44179/changes Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend", + "additions": 5, + "author": "ethanknights", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Some improvements to the `trainer.py` docs. ## Before submitting - [x] This PR fixes a typo or improves the docs. ## Who can review? Documentation: @stevhliu", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 5, - "conversation_url": "https://github.com/huggingface/transformers/pull/44235", - "created_at": "2026-02-23T17:06:54Z", - "deletions": 0, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44268", + "created_at": "2026-02-24T23:20:16Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44235/files", - "html_url": "https://github.com/huggingface/transformers/pull/44235", + "files_url": "https://github.com/huggingface/transformers/pull/44268/files", + "html_url": "https://github.com/huggingface/transformers/pull/44268", "labels": [], "merged": true, - "number": 44235, + "number": 44268, "review_comments_count": 0, "state": "closed", - "title": "update fuyu tokenizer class", - "updated_at": "2026-02-23T17:36:22Z" + "title": "chore: fixes in `Trainer` class docs (`compute_loss` & `hyperparameter_search`)", + "updated_at": "2026-02-26T00:50:23Z" }, { - "additions": 249, - "author": "yonigozlan", - "author_association": "MEMBER", - "body_excerpt": "Cc @zucchini-nlp", - "changed_files": 3, + "additions": 4, + "author": "manavshrivastavagit", + "author_association": "NONE", + "body_excerpt": "## Summary - Update the `DocumentQuestionAnsweringPipeline` docstring to explicitly mention the task summary in the Transformers documentation. - Remove the stale TODO comment now that document question answering is covered in the task sum\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44234", - "created_at": "2026-02-23T17:03:05Z", - "deletions": 55, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44267", + "created_at": "2026-02-24T20:35:18Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44234/files", - "html_url": "https://github.com/huggingface/transformers/pull/44234", - "labels": [], - "merged": true, - "number": 44234, - "review_comments_count": 2, + "files_url": "https://github.com/huggingface/transformers/pull/44267/files", + "html_url": "https://github.com/huggingface/transformers/pull/44267", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44267, + "review_comments_count": 0, "state": "closed", - "title": "Add processing tests for phi4 multimodal", - "updated_at": "2026-02-23T22:08:11Z" + "title": "Docs: point DocumentQuestionAnswering pipeline to task summary", + "updated_at": "2026-02-25T13:34:48Z" }, { - "additions": 219, - "author": "tarekziade", - "author_association": "MEMBER", - "body_excerpt": "Extends `ty` coverage to `src/transformers/generation` - Added a dedicated type-check wrapper script: `utils/check_types.py`. - Updated `Makefile` to run `ty` checks through the wrapper in both `style` and `check-repo`. - merged all typing\u2026", - "changed_files": 15, + "additions": 27, + "author": "harshaljanjani", + "author_association": "CONTRIBUTOR", + "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 **Reasoning:** The impact of this fix goes beyond `Mask2Former` and `DeformableDetr` and should fix any model that uses `torch_compilable_check`. Most use\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 12, - "conversation_url": "https://github.com/huggingface/transformers/pull/44233", - "created_at": "2026-02-23T16:23:24Z", - "deletions": 101, + "comments_count": 8, + "conversation_url": "https://github.com/huggingface/transformers/pull/44266", + "created_at": "2026-02-24T20:02:06Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44233/files", - "html_url": "https://github.com/huggingface/transformers/pull/44233", + "files_url": "https://github.com/huggingface/transformers/pull/44266/files", + "html_url": "https://github.com/huggingface/transformers/pull/44266", "labels": [], "merged": true, - "number": 44233, - "review_comments_count": 33, + "number": 44266, + "review_comments_count": 0, "state": "closed", - "title": "chore(typing): Add type checking to `src/transformers/generation`", - "updated_at": "2026-03-04T17:24:37Z" + "title": "fix(utils): Make torch_compilable_check compatible with torch.export strict mode", + "updated_at": "2026-02-26T09:42:47Z" }, { - "additions": 11, - "author": "tarekziade", + "additions": 90, + "author": "vasqu", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? per https://code.claude.com/docs/en/claude-code-on-the-web#best-practices `CLAUDE.md` can alias directly into `AGENTS.md`", - "changed_files": 2, + "body_excerpt": "As per title, WIP --> needs a test", + "changed_files": 36, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44232", - "created_at": "2026-02-23T16:10:15Z", - "deletions": 109, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44232/files", - "html_url": "https://github.com/huggingface/transformers/pull/44232", + "conversation_url": "https://github.com/huggingface/transformers/pull/44264", + "created_at": "2026-02-24T18:06:58Z", + "deletions": 210, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44264/files", + "html_url": "https://github.com/huggingface/transformers/pull/44264", "labels": [], - "merged": true, - "number": 44232, - "review_comments_count": 2, - "state": "closed", - "title": "chore: added CLAUDE.md alias", - "updated_at": "2026-02-24T14:48:36Z" + "merged": false, + "number": 44264, + "review_comments_count": 3, + "state": "open", + "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0", + "updated_at": "2026-02-25T18:53:20Z" }, { - "additions": 413, - "author": "IlyasMoutawwakil", + "additions": 5882, + "author": "SunMarc", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026", + "body_excerpt": "# What does this PR do? \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], - "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44100", - "created_at": "2026-02-17T17:10:36Z", - "deletions": 3, + "cluster_role": null, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44127", + "created_at": "2026-02-18T10:41:48Z", + "deletions": 8, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44100/files", - "html_url": "https://github.com/huggingface/transformers/pull/44100", + "files_url": "https://github.com/huggingface/transformers/pull/44127/files", + "html_url": "https://github.com/huggingface/transformers/pull/44127", "labels": [], "merged": true, - "number": 44100, + "number": 44127, "review_comments_count": 0, "state": "closed", - "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps", - "updated_at": "2026-02-20T09:57:51Z" + "title": "AutoTokenizer ignores config when model_type is None", + "updated_at": "2026-02-18T14:47:52Z" }, { - "additions": 2, - "author": "qgallouedec", + "additions": 17, + "author": "Cyrilvallez", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do?