diff --git "a/data/prs.json" "b/data/prs.json" --- "a/data/prs.json" +++ "b/data/prs.json" @@ -1,27634 +1,27466 @@ [ { - "additions": 71, - "author": "zucchini-nlp", + "additions": 115, + "author": "IlyasMoutawwakil", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Credits to @xenova , adds an example code since it is more complicated that text LLMs. Beginner users might not know and an example is pretty much useful for them", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026", - "changed_files": 8, + "body_excerpt": "# What does this PR do? Fixes `attn_implementation=\"flash_attention_3\"` which is currently broken for the most common FA3 install method \u2014 the hopper wheel built from `flash-attention/hopper/`. **Three issues fixed:** 1. **`is_flash_attn_3\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/45350", - "created_at": "2026-04-09T17:46:37Z", - "deletions": 0, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/45350/files", - "html_url": "https://github.com/huggingface/transformers/pull/45350", + "conversation_url": "https://github.com/huggingface/transformers/pull/45387", + "created_at": "2026-04-12T17:02:14Z", + "deletions": 15, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/45387/files", + "html_url": "https://github.com/huggingface/transformers/pull/45387", "labels": [], "merged": false, - "number": 45350, - "review_comments_count": 0, - "state": "open", - "title": "WIP: Add support for Granite4VisionForConditionalGeneration", - "updated_at": "2026-04-10T12:34:50Z" + "number": 45387, + "review_comments_count": 4, + "state": "closed", + "title": "Fix flash_attention_3 detection and import for hopper wheel installs", + "updated_at": "2026-04-13T16:14:21Z" }, { - "additions": 90, - "author": "florian6973", + "additions": 26, + "author": "UsamaKenway", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Fixes #45305 Add a regression test in `TrainerGradientAccumulationTest` to avoid passing the GAS value to Accelerate by mistake Description: I force the value of the `num_steps` parameter to be 1, and the regression\u2026", + "body_excerpt": "Optimizes memory usage when loading GGUF models by performing dtype casting immediately after dequantization. While I was adding the support for Gemma4 in this PR #45296, i noticed this issue that the GGUF tensors are dequantized to `float\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45349", - "created_at": "2026-04-09T17:24:39Z", - "deletions": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/45386", + "created_at": "2026-04-12T13:17:17Z", + "deletions": 13, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45349/files", - "html_url": "https://github.com/huggingface/transformers/pull/45349", + "files_url": "https://github.com/huggingface/transformers/pull/45386/files", + "html_url": "https://github.com/huggingface/transformers/pull/45386", "labels": [], "merged": false, - "number": 45349, - "review_comments_count": 6, + "number": 45386, + "review_comments_count": 2, "state": "open", - "title": "Fix #45305 + add regression test GAS", - "updated_at": "2026-04-11T18:42:18Z" + "title": "[GGUF] Reduce peak RAM usage by casting dequantized tensors early during load", + "updated_at": "2026-04-13T22:51:25Z" }, { - "additions": 50, - "author": "qgallouedec", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes #45290 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026", - "changed_files": 5, + "additions": 15, + "author": "songyuc", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR do? Loading `openai/clip-vit-base-patch32` currently reports the following keys as unexpected: - `text_model.embeddings.position_ids` - `vision_model.embeddings.position_ids` In the current CLIP implementation, these b\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45348", - "created_at": "2026-04-09T15:59:07Z", - "deletions": 19, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45385", + "created_at": "2026-04-12T12:53:49Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45348/files", - "html_url": "https://github.com/huggingface/transformers/pull/45348", + "files_url": "https://github.com/huggingface/transformers/pull/45385/files", + "html_url": "https://github.com/huggingface/transformers/pull/45385", "labels": [], "merged": false, - "number": 45348, - "review_comments_count": 5, - "state": "open", - "title": "Fix apply_chat_template crash on tool_call messages without content", - "updated_at": "2026-04-11T01:40:44Z" - }, - { - "additions": 35, - "author": "Cyrilvallez", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As per the title. `accelerate` destroys the dict otherwise, if it's not BOTH passed as kwarg AND part of `_skip_keys_device_placement`.......... `per_layer_input` needs to stay as a positional arg, for gradient chec\u2026", - "changed_files": 3, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45347", - "created_at": "2026-04-09T15:31:34Z", - "deletions": 6, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45347/files", - "html_url": "https://github.com/huggingface/transformers/pull/45347", - "labels": [], - "merged": true, - "number": 45347, + "number": 45385, "review_comments_count": 0, - "state": "closed", - "title": "[gemma4] Fix device map auto", - "updated_at": "2026-04-09T15:45:15Z" + "state": "open", + "title": "Ignore CLIP position_ids in unexpected key loading report", + "updated_at": "2026-04-12T15:05:10Z" }, { - "additions": 46, - "author": "ionut-anghelina", + "additions": 25, + "author": "GitGlimpse895", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": null, - "changed_files": 15, + "body_excerpt": "# What does this PR do? `StoppingCriteriaList.__call__` previously evaluated every registered criterion unconditionally on every generation step, even after `is_done` was already `True` for all sequences in the batch. This adds a single `i\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/45346", - "created_at": "2026-04-09T14:48:28Z", - "deletions": 30, + "conversation_url": "https://github.com/huggingface/transformers/pull/45384", + "created_at": "2026-04-12T10:14:58Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45346/files", - "html_url": "https://github.com/huggingface/transformers/pull/45346", + "files_url": "https://github.com/huggingface/transformers/pull/45384/files", + "html_url": "https://github.com/huggingface/transformers/pull/45384", "labels": [], "merged": false, - "number": 45346, - "review_comments_count": 1, + "number": 45384, + "review_comments_count": 0, "state": "open", - "title": "Fix Double Application of Softmax for Router Logits in MoE models", - "updated_at": "2026-04-09T15:50:47Z" + "title": "generation/stopping_criteria: short-circuit StoppingCriteriaList when all sequences are done", + "updated_at": "2026-04-14T03:23:23Z" }, { - "additions": 30, - "author": "ansley", + "additions": 6, + "author": "Aftabbs", "author_association": "NONE", - "body_excerpt": "The `transformers` V5 \"rm slow tokenizers\" refactor (\\#40936) aliased `LlamaTokenizerFast` to `LlamaTokenizer`, whose `__init__` unconditionally installs a SentencePiece Metaspace pre-tokenizer. This is correct for classic Llama/Llama-2 mo\u2026", - "changed_files": 2, + "body_excerpt": "## Description Fixes #45290. `apply_chat_template(tokenize=True)` raises `KeyError: 'content'` when a conversation contains an assistant message that has `tool_calls` but no `content` key: ```python processor.apply_chat_template( [[ {\"role\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 6, - "conversation_url": "https://github.com/huggingface/transformers/pull/45345", - "created_at": "2026-04-09T14:31:40Z", - "deletions": 14, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/45383", + "created_at": "2026-04-12T08:48:26Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45345/files", - "html_url": "https://github.com/huggingface/transformers/pull/45345", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/45383/files", + "html_url": "https://github.com/huggingface/transformers/pull/45383", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 45345, + "number": 45383, "review_comments_count": 0, "state": "closed", - "title": "Fix ByteLevel-BPE tokenizers silently breaking in `LlamaTokenizer`", - "updated_at": "2026-04-10T12:45:24Z" + "title": "fix(processing): guard message content access in apply_chat_template", + "updated_at": "2026-04-13T09:34:50Z" }, { - "additions": 6, - "author": "tarekziade", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Simple hook to display test duration. This will append inline duration per test during the run, example: ``` tests/utils/test_configuration_utils.py::ConfigPushToHubTester::test_push_to_hub [gw1] [ 90%] PASSED tests\u2026", - "changed_files": 1, + "additions": 334, + "author": "zFlux", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary Adds conversion from `facebook/audiogen-medium` (AudioCraft Hub layout: `state_dict.bin` + `compression_state_dict.bin`) to `MusicgenForConditionalGeneration`. - `convert_audiogen_transformers.py` \u2014 reuses `rename_state_dict` fr\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/45344", - "created_at": "2026-04-09T14:22:46Z", + "conversation_url": "https://github.com/huggingface/transformers/pull/45382", + "created_at": "2026-04-12T03:38:01Z", "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45344/files", - "html_url": "https://github.com/huggingface/transformers/pull/45344", + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/45382/files", + "html_url": "https://github.com/huggingface/transformers/pull/45382", "labels": [], - "merged": true, - "number": 45344, + "merged": false, + "number": 45382, "review_comments_count": 0, - "state": "closed", - "title": "refactor: display test duration", - "updated_at": "2026-04-09T15:19:26Z" + "state": "open", + "title": "Add AudioGen (AudioCraft) to MusicGen conversion scripts", + "updated_at": "2026-04-12T03:39:00Z" }, { - "additions": 8, - "author": "Cyrilvallez", - "author_association": "MEMBER", - "body_excerpt": null, - "changed_files": 1, + "additions": 11, + "author": "Brianzhengca", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026", + "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45309", - "created_at": "2026-04-08T08:40:08Z", - "deletions": 23, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45309/files", - "html_url": "https://github.com/huggingface/transformers/pull/45309", - "labels": [ - "Code agent slop" - ], + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/45350", + "created_at": "2026-04-09T17:46:37Z", + "deletions": 0, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/45350/files", + "html_url": "https://github.com/huggingface/transformers/pull/45350", + "labels": [], "merged": false, - "number": 45309, + "number": 45350, "review_comments_count": 0, - "state": "closed", - "title": "Fix KeyError in apply_chat_template when message has no content (#45290)", - "updated_at": "2026-04-08T11:30:37Z" + "state": "open", + "title": "WIP: Add support for Granite4VisionForConditionalGeneration", + "updated_at": "2026-04-10T12:34:50Z" }, { - "additions": 10, - "author": "juliabush", - "author_association": "NONE", - "body_excerpt": "## What does this PR do? Fixes #29942 Flash Attention 2 inference equivalence tests for Whisper can fail due to higher numerical variance compared to the eager attention implementation. This PR increases the tolerance (`atol`, `rtol`) spec\u2026", - "changed_files": 1, + "additions": 90, + "author": "florian6973", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Fixes #45305 Add a regression test in `TrainerGradientAccumulationTest` to avoid passing the GAS value to Accelerate by mistake Description: I force the value of the `num_steps` parameter to be 1, and the regression\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/45303", - "created_at": "2026-04-07T21:37:00Z", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/45349", + "created_at": "2026-04-09T17:24:39Z", "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45303/files", - "html_url": "https://github.com/huggingface/transformers/pull/45303", + "files_url": "https://github.com/huggingface/transformers/pull/45349/files", + "html_url": "https://github.com/huggingface/transformers/pull/45349", "labels": [ - "Code agent slop" + "for patch" ], - "merged": false, - "number": 45303, - "review_comments_count": 0, + "merged": true, + "number": 45349, + "review_comments_count": 6, "state": "closed", - "title": "Fix FA2 inference equivalence failures for Whisper (closes #29942)", - "updated_at": "2026-04-08T14:42:36Z" + "title": "Fix #45305 + add regression test GAS", + "updated_at": "2026-04-13T14:41:43Z" }, { - "additions": 7, - "author": "jagwar", + "additions": 50, + "author": "qgallouedec", "author_association": "MEMBER", - "body_excerpt": "## Security Fix Fixes a trust check bypass in `trl-ci-bot.yml` that allowed any GitHub user to trigger TRL CI on self-hosted GPU runners by commenting `/trl-ci` on any PR. ### The bug The \"Ignore untrusted commenter\" step used `exit 0`, wh\u2026", - "changed_files": 1, + "body_excerpt": "# What does this PR do? Fixes #45290 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026", + "changed_files": 5, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/45302", - "created_at": "2026-04-07T21:35:38Z", - "deletions": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/45348", + "created_at": "2026-04-09T15:59:07Z", + "deletions": 19, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/45302/files", - "html_url": "https://github.com/huggingface/transformers/pull/45302", + "files_url": "https://github.com/huggingface/transformers/pull/45348/files", + "html_url": "https://github.com/huggingface/transformers/pull/45348", "labels": [], "merged": true, - "number": 45302, - "review_comments_count": 0, + "number": 45348, + "review_comments_count": 7, "state": "closed", - "title": "fix(security): prevent untrusted users from triggering TRL CI dispatch", - "updated_at": "2026-04-07T21:59:38Z" + "title": "Fix `apply_chat_template` crash on `tool_call` messages without content", + "updated_at": "2026-04-13T19:44:38Z" }, { - "additions": 0, - "author": "sahildando", - "author_association": "NONE", - "body_excerpt": "# What does this PR do? save locally --> local locally) ```\u2026", - "changed_files": 2, + "body_excerpt": "# What does this PR do? This patch - adds a simple cache to the model linter so we skip files that did not change and were valid - reworks `Makefile` targets", + "changed_files": 6, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44730", - "created_at": "2026-03-15T20:44:32Z", - "deletions": 4, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44790", + "created_at": "2026-03-17T08:54:47Z", + "deletions": 19, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44730/files", - "html_url": "https://github.com/huggingface/transformers/pull/44730", + "files_url": "https://github.com/huggingface/transformers/pull/44790/files", + "html_url": "https://github.com/huggingface/transformers/pull/44790", "labels": [], "merged": true, - "number": 44730, - "review_comments_count": 6, + "number": 44790, + "review_comments_count": 1, "state": "closed", - "title": "Fix `mlcd` auto config/model/mapping issues", - "updated_at": "2026-03-16T12:12:30Z" + "title": "feat: added cache to the model linter", + "updated_at": "2026-03-24T15:28:29Z" }, { - "additions": 214, - "author": "xenova", + "additions": 21, + "author": "ydshieh", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026", - "changed_files": 58, + "body_excerpt": "# What does this PR do? Some configs from the hub have different types.", + "changed_files": 7, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44729", - "created_at": "2026-03-15T20:29:38Z", - "deletions": 225, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44789", + "created_at": "2026-03-17T08:41:30Z", + "deletions": 21, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44729/files", - "html_url": "https://github.com/huggingface/transformers/pull/44729", + "files_url": "https://github.com/huggingface/transformers/pull/44789/files", + "html_url": "https://github.com/huggingface/transformers/pull/44789", "labels": [], - "merged": false, - "number": 44729, - "review_comments_count": 0, - "state": "open", - "title": "Avoid floating point math for ceil operations", - "updated_at": "2026-03-15T20:49:34Z" + "merged": true, + "number": 44789, + "review_comments_count": 5, + "state": "closed", + "title": "Fix config loading issues (type issues)", + "updated_at": "2026-03-17T09:44:50Z" }, { - "additions": 88, - "author": "ajmeese7", - "author_association": "NONE", - "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026", - "changed_files": 2, + "additions": 0, + "author": "BillionClaw", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "The pipeline() docstring included an example using the 'question-answering' task, but this task is not in SUPPORTED_TASKS and will raise an error when used. Remove this outdated example to avoid confusing users following the documentation.\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44728", - "created_at": "2026-03-15T19:56:44Z", - "deletions": 1, + "comments_count": 9, + "conversation_url": "https://github.com/huggingface/transformers/pull/44788", + "created_at": "2026-03-17T08:38:25Z", + "deletions": 5, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44728/files", - "html_url": "https://github.com/huggingface/transformers/pull/44728", + "files_url": "https://github.com/huggingface/transformers/pull/44788/files", + "html_url": "https://github.com/huggingface/transformers/pull/44788", "labels": [], "merged": false, - "number": 44728, + "number": 44788, "review_comments_count": 0, "state": "closed", - "title": "Fix float16 memory leak during 4-bit quantized model loading", - "updated_at": "2026-03-16T20:53:54Z" + "title": "docs(pipelines): remove outdated question-answering example", + "updated_at": "2026-03-23T17:19:33Z" }, { - "additions": 202, - "author": "LincolnBurrows2017", + "additions": 4, + "author": "BillionClaw", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.", - "changed_files": 11, + "body_excerpt": "The question-answering pipeline was removed in v5.0.0 per MIGRATION_GUIDE_V5.md, but the non-English task guides still referenced it. This updates the Arabic, Chinese, Japanese, and Korean question answering task guides to remove usage of\u2026", + "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44727", - "created_at": "2026-03-15T19:41:24Z", - "deletions": 33, + "conversation_url": "https://github.com/huggingface/transformers/pull/44787", + "created_at": "2026-03-17T08:24:09Z", + "deletions": 66, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44727/files", - "html_url": "https://github.com/huggingface/transformers/pull/44727", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44727, + "files_url": "https://github.com/huggingface/transformers/pull/44787/files", + "html_url": "https://github.com/huggingface/transformers/pull/44787", + "labels": [], + "merged": true, + "number": 44787, "review_comments_count": 0, "state": "closed", - "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file", - "updated_at": "2026-03-18T13:15:46Z" + "title": "docs(tasks): remove references to removed question-answering pipeline", + "updated_at": "2026-03-17T16:23:50Z" }, { - "additions": 198, - "author": "LincolnBurrows2017", + "additions": 25, + "author": "BillionClaw", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).", - "changed_files": 10, + "body_excerpt": "AMD Strix Halo APUs (gfx1151) experience OOM errors when loading large models via safetensors mmap due to unified memory architecture issues. This fix detects Strix Halo GPUs by checking the GPU architecture name (gfx1151) and forces a CPU\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44725", - "created_at": "2026-03-15T17:41:18Z", - "deletions": 29, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44786", + "created_at": "2026-03-17T08:17:32Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44725/files", - "html_url": "https://github.com/huggingface/transformers/pull/44725", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44786/files", + "html_url": "https://github.com/huggingface/transformers/pull/44786", + "labels": [], "merged": false, - "number": 44725, + "number": 44786, "review_comments_count": 0, "state": "closed", - "title": "fix: replace bare except with Exception in Fuyu image processing", - "updated_at": "2026-03-18T13:16:22Z" - }, - { - "additions": 6, - "author": "ydshieh", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? TO be explained.", - "changed_files": 5, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44724", - "created_at": "2026-03-15T17:14:12Z", - "deletions": 5, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44724/files", - "html_url": "https://github.com/huggingface/transformers/pull/44724", - "labels": [], - "merged": false, - "number": 44724, - "review_comments_count": 1, - "state": "open", - "title": "Fix some missing / incorrect entries in auto files", - "updated_at": "2026-03-16T09:59:56Z" + "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM", + "updated_at": "2026-03-17T10:29:44Z" }, { - "additions": 12, - "author": "aashirpersonal", - "author_association": "NONE", - "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026", - "changed_files": 2, + "additions": 307, + "author": "BillionClaw", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44723", - "created_at": "2026-03-15T16:52:03Z", - "deletions": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44785", + "created_at": "2026-03-17T06:55:31Z", + "deletions": 83, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44723/files", - "html_url": "https://github.com/huggingface/transformers/pull/44723", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44785/files", + "html_url": "https://github.com/huggingface/transformers/pull/44785", + "labels": [], "merged": false, - "number": 44723, + "number": 44785, "review_comments_count": 0, "state": "closed", - "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", - "updated_at": "2026-03-18T15:05:52Z" + "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM", + "updated_at": "2026-03-17T10:28:06Z" }, { - "additions": 38, - "author": "chandan11248", + "additions": 2, + "author": "BillionClaw", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026", - "changed_files": 2, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", + "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44722", - "created_at": "2026-03-15T15:33:25Z", - "deletions": 110, + "conversation_url": "https://github.com/huggingface/transformers/pull/44783", + "created_at": "2026-03-17T05:58:54Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44722/files", - "html_url": "https://github.com/huggingface/transformers/pull/44722", + "files_url": "https://github.com/huggingface/transformers/pull/44783/files", + "html_url": "https://github.com/huggingface/transformers/pull/44783", "labels": [], "merged": false, - "number": 44722, + "number": 44783, "review_comments_count": 0, "state": "open", - "title": "Refactor gptj output tracing to use standardized decorators", - "updated_at": "2026-03-19T18:12:59Z" + "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer", + "updated_at": "2026-03-17T11:12:52Z" }, { - "additions": 4, - "author": "rsmed31", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026", + "additions": 6, + "author": "JiwaniZakir", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026", "changed_files": 1, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44718", - "created_at": "2026-03-14T23:57:14Z", - "deletions": 3, + "cluster_id": "cluster-44737-6", + "cluster_ids": [ + "cluster-44737-6" + ], + "cluster_role": "canonical", + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44782", + "created_at": "2026-03-17T05:11:36Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44718/files", - "html_url": "https://github.com/huggingface/transformers/pull/44718", + "files_url": "https://github.com/huggingface/transformers/pull/44782/files", + "html_url": "https://github.com/huggingface/transformers/pull/44782", "labels": [], - "merged": false, - "number": 44718, + "merged": true, + "number": 44782, "review_comments_count": 0, "state": "closed", - "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", - "updated_at": "2026-03-15T17:58:58Z" + "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward", + "updated_at": "2026-03-19T13:30:48Z" }, { - "additions": 15, - "author": "ydshieh", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).", - "changed_files": 7, + "additions": 5, + "author": "bensons", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44715", - "created_at": "2026-03-14T21:11:52Z", - "deletions": 2, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44781", + "created_at": "2026-03-17T04:59:02Z", + "deletions": 2849, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44715/files", - "html_url": "https://github.com/huggingface/transformers/pull/44715", + "files_url": "https://github.com/huggingface/transformers/pull/44781/files", + "html_url": "https://github.com/huggingface/transformers/pull/44781", "labels": [], - "merged": true, - "number": 44715, + "merged": false, + "number": 44781, "review_comments_count": 0, - "state": "closed", - "title": "Fix missing / incorrect `config` class in some model class definitions", - "updated_at": "2026-03-15T11:19:51Z" + "state": "open", + "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`", + "updated_at": "2026-03-27T23:19:21Z" }, { - "additions": 181, + "additions": 145, "author": "LincolnBurrows2017", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026", + "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.", "changed_files": 8, - "cluster_id": "cluster-44625-9", - "cluster_ids": [ - "cluster-44625-9" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44714", - "created_at": "2026-03-14T20:42:46Z", - "deletions": 26, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44778", + "created_at": "2026-03-16T23:41:29Z", + "deletions": 28, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44714/files", - "html_url": "https://github.com/huggingface/transformers/pull/44714", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44778/files", + "html_url": "https://github.com/huggingface/transformers/pull/44778", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44714, + "number": 44778, "review_comments_count": 0, "state": "closed", - "title": "fix: propagate num_labels to text_config for Qwen models", - "updated_at": "2026-03-18T12:56:27Z" + "title": "fix: correct logic error in is_tiktoken_available function", + "updated_at": "2026-03-18T13:15:37Z" }, { - "additions": 15, - "author": "kulkarni-rohan", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026", - "changed_files": 2, + "additions": 35, + "author": "stevhliu", + "author_association": "MEMBER", + "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44713", - "created_at": "2026-03-14T20:20:14Z", - "deletions": 28, + "conversation_url": "https://github.com/huggingface/transformers/pull/44777", + "created_at": "2026-03-16T21:58:40Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44713/files", - "html_url": "https://github.com/huggingface/transformers/pull/44713", + "files_url": "https://github.com/huggingface/transformers/pull/44777/files", + "html_url": "https://github.com/huggingface/transformers/pull/44777", "labels": [], - "merged": false, - "number": 44713, - "review_comments_count": 0, - "state": "open", - "title": "[ColQwen2] Refactor output tracing (issue #43979)", - "updated_at": "2026-03-14T20:21:24Z" + "merged": true, + "number": 44777, + "review_comments_count": 1, + "state": "closed", + "title": "[docs] is_causal feature", + "updated_at": "2026-03-17T19:50:43Z" }, { - "additions": 2, - "author": "ydshieh", + "additions": 0, + "author": "stevhliu", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.", + "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44712", - "created_at": "2026-03-14T20:18:01Z", - "deletions": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44776", + "created_at": "2026-03-16T20:43:33Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44712/files", - "html_url": "https://github.com/huggingface/transformers/pull/44712", + "files_url": "https://github.com/huggingface/transformers/pull/44776/files", + "html_url": "https://github.com/huggingface/transformers/pull/44776", "labels": [], "merged": true, - "number": 44712, - "review_comments_count": 0, - "state": "closed", - "title": "Update Nvidia CI docker file to use torch 2.10", - "updated_at": "2026-03-14T20:29:04Z" - }, - { - "additions": 339, - "author": "anuq", - "author_association": "NONE", - "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026", - "changed_files": 4, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44711", - "created_at": "2026-03-14T19:21:21Z", - "deletions": 205, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44711/files", - "html_url": "https://github.com/huggingface/transformers/pull/44711", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44711, - "review_comments_count": 0, - "state": "closed", - "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`", - "updated_at": "2026-03-20T13:36:58Z" - }, - { - "additions": 12, - "author": "he-yufeng", - "author_association": "CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026", - "changed_files": 1, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44710", - "created_at": "2026-03-14T18:33:53Z", - "deletions": 2, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44710/files", - "html_url": "https://github.com/huggingface/transformers/pull/44710", - "labels": [], - "merged": true, - "number": 44710, + "number": 44776, "review_comments_count": 0, "state": "closed", - "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs", - "updated_at": "2026-03-25T18:13:14Z" + "title": "[fix] mistral 4 docs", + "updated_at": "2026-03-16T21:11:29Z" }, { - "additions": 6778, - "author": "LucasMa2025", - "author_association": "FIRST_TIMER", - "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026", - "changed_files": 15, + "additions": 177, + "author": "stevhliu", + "author_association": "MEMBER", + "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44708", - "created_at": "2026-03-14T17:13:34Z", - "deletions": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/44775", + "created_at": "2026-03-16T20:23:29Z", + "deletions": 109, "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44708/files", - "html_url": "https://github.com/huggingface/transformers/pull/44708", + "files_url": "https://github.com/huggingface/transformers/pull/44775/files", + "html_url": "https://github.com/huggingface/transformers/pull/44775", "labels": [], "merged": false, - "number": 44708, + "number": 44775, "review_comments_count": 0, - "state": "closed", - "title": "Add Configurable Generation Scheduler and State Machine for `generate()`", - "updated_at": "2026-03-14T19:19:11Z" + "state": "open", + "title": "[docs] n-d parallelism", + "updated_at": "2026-03-16T20:28:48Z" }, { - "additions": 3, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026", - "changed_files": 1, + "additions": 0, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44707", - "created_at": "2026-03-14T17:12:16Z", - "deletions": 0, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44774", + "created_at": "2026-03-16T18:56:19Z", + "deletions": 6, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44707/files", - "html_url": "https://github.com/huggingface/transformers/pull/44707", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44707, + "files_url": "https://github.com/huggingface/transformers/pull/44774/files", + "html_url": "https://github.com/huggingface/transformers/pull/44774", + "labels": [], + "merged": true, + "number": 44774, "review_comments_count": 0, "state": "closed", - "title": "Add position_ids to MptForCausalLM forward pass", - "updated_at": "2026-03-18T13:39:36Z" + "title": "Remove `is_causal` from `EuroBertConfig`", + "updated_at": "2026-03-17T09:33:21Z" }, { "additions": 3, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026", - "changed_files": 1, + "author": "githubnemo", + "author_association": "MEMBER", + "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44706", - "created_at": "2026-03-14T17:09:11Z", - "deletions": 0, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44772", + "created_at": "2026-03-16T18:46:13Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44706/files", - "html_url": "https://github.com/huggingface/transformers/pull/44706", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44772/files", + "html_url": "https://github.com/huggingface/transformers/pull/44772", + "labels": [], "merged": false, - "number": 44706, + "number": 44772, "review_comments_count": 0, - "state": "closed", - "title": "Add position_ids to BloomForCausalLM forward pass", - "updated_at": "2026-03-18T13:39:51Z" + "state": "open", + "title": "bitsandbytes: Update links and docs", + "updated_at": "2026-03-17T15:57:56Z" }, { - "additions": 14, - "author": "saivedant169", - "author_association": "NONE", - "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026", + "additions": 2, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? wtf", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44705", - "created_at": "2026-03-14T16:48:06Z", + "conversation_url": "https://github.com/huggingface/transformers/pull/44771", + "created_at": "2026-03-16T18:45:11Z", "deletions": 1, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44705/files", - "html_url": "https://github.com/huggingface/transformers/pull/44705", - "labels": [ - "Code agent slop" - ], + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44771/files", + "html_url": "https://github.com/huggingface/transformers/pull/44771", + "labels": [], "merged": false, - "number": 44705, + "number": 44771, "review_comments_count": 0, - "state": "closed", - "title": "Add position_ids to RoFormerForCausalLM forward pass", - "updated_at": "2026-03-18T13:40:05Z" + "state": "open", + "title": "wtf", + "updated_at": "2026-03-16T18:56:00Z" }, { - "additions": 26, - "author": "vasqu", + "additions": 203, + "author": "zucchini-nlp", "author_association": "MEMBER", - "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice", + "changed_files": 12, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44703", - "created_at": "2026-03-14T14:46:02Z", - "deletions": 10, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44770", + "created_at": "2026-03-16T18:44:03Z", + "deletions": 268, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44703/files", - "html_url": "https://github.com/huggingface/transformers/pull/44703", + "files_url": "https://github.com/huggingface/transformers/pull/44770/files", + "html_url": "https://github.com/huggingface/transformers/pull/44770", "labels": [], "merged": true, - "number": 44703, + "number": 44770, "review_comments_count": 1, "state": "closed", - "title": "[`FA`] Fix fa detection", - "updated_at": "2026-03-14T17:19:07Z" + "title": "Fix configs with `@strict`", + "updated_at": "2026-03-17T15:39:43Z" }, { - "additions": 148, + "additions": 145, "author": "LincolnBurrows2017", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026", + "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026", "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44702", - "created_at": "2026-03-14T14:41:15Z", - "deletions": 25, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44769", + "created_at": "2026-03-16T18:40:07Z", + "deletions": 28, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44702/files", - "html_url": "https://github.com/huggingface/transformers/pull/44702", + "files_url": "https://github.com/huggingface/transformers/pull/44769/files", + "html_url": "https://github.com/huggingface/transformers/pull/44769", "labels": [ "Code agent slop" ], "merged": false, - "number": 44702, - "review_comments_count": 0, - "state": "closed", - "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig", - "updated_at": "2026-03-18T13:00:12Z" - }, - { - "additions": 219, - "author": "hmellor", - "author_association": "MEMBER", - "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026", - "changed_files": 52, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44699", - "created_at": "2026-03-14T11:44:24Z", - "deletions": 148, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44699/files", - "html_url": "https://github.com/huggingface/transformers/pull/44699", - "labels": [], - "merged": true, - "number": 44699, - "review_comments_count": 0, - "state": "closed", - "title": "Fix several based models' pipeline parallel support", - "updated_at": "2026-03-20T13:53:27Z" - }, - { - "additions": 1, - "author": "hmellor", - "author_association": "MEMBER", - "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.", - "changed_files": 1, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44698", - "created_at": "2026-03-14T11:18:54Z", - "deletions": 1, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44698/files", - "html_url": "https://github.com/huggingface/transformers/pull/44698", - "labels": [], - "merged": true, - "number": 44698, + "number": 44769, "review_comments_count": 0, "state": "closed", - "title": "Fix `set_encoder`", - "updated_at": "2026-03-14T13:42:00Z" - }, - { - "additions": 75, - "author": "LincolnBurrows2017", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026", - "changed_files": 4, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44697", - "created_at": "2026-03-14T10:44:12Z", - "deletions": 25, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44697/files", - "html_url": "https://github.com/huggingface/transformers/pull/44697", - "labels": [], - "merged": false, - "number": 44697, - "review_comments_count": 1, - "state": "open", - "title": "fix: torch_float should return float, not int", - "updated_at": "2026-03-17T19:29:02Z" + "title": "Fix: Handle empty lists in video_utils functions", + "updated_at": "2026-03-18T13:15:55Z" }, { - "additions": 19, - "author": "hmellor", + "additions": 20, + "author": "michaelbenayoun", "author_association": "MEMBER", - "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026", + "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44696", - "created_at": "2026-03-14T09:41:07Z", - "deletions": 13, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44696/files", - "html_url": "https://github.com/huggingface/transformers/pull/44696", - "labels": [], - "merged": true, - "number": 44696, - "review_comments_count": 5, - "state": "closed", - "title": "Fix `supports_{tp/pp}_plan`", - "updated_at": "2026-03-31T13:12:56Z" - }, - { - "additions": 4, - "author": "harshaljanjani", - "author_association": "CONTRIBUTOR", - "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026", - "changed_files": 2, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 6, - "conversation_url": "https://github.com/huggingface/transformers/pull/44695", - "created_at": "2026-03-14T09:05:35Z", - "deletions": 4, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44695/files", - "html_url": "https://github.com/huggingface/transformers/pull/44695", - "labels": [], - "merged": true, - "number": 44695, - "review_comments_count": 3, - "state": "closed", - "title": "fix(testing): Fix Kyutai Speech-To-Text and LongCatFlash test failures on main CI", - "updated_at": "2026-04-09T15:41:05Z" - }, - { - "additions": 143, - "author": "LincolnBurrows2017", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026", - "changed_files": 7, - "cluster_id": "cluster-44625-9", - "cluster_ids": [ - "cluster-44625-9" - ], - "cluster_role": "member", "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44693", - "created_at": "2026-03-14T05:43:00Z", - "deletions": 30, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44693/files", - "html_url": "https://github.com/huggingface/transformers/pull/44693", - "labels": [], - "merged": false, - "number": 44693, - "review_comments_count": 0, - "state": "closed", - "title": "fix: Propagate num_labels to text_config in Qwen3.5", - "updated_at": "2026-03-18T12:56:25Z" - }, - { - "additions": 18, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026", - "changed_files": 2, - "cluster_id": "cluster-44514-8", - "cluster_ids": [ - "cluster-44514-8" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44692", - "created_at": "2026-03-14T04:14:38Z", - "deletions": 10, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44692/files", - "html_url": "https://github.com/huggingface/transformers/pull/44692", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44692, - "review_comments_count": 0, - "state": "closed", - "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template", - "updated_at": "2026-03-18T12:44:18Z" - }, - { - "additions": 23, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026", - "changed_files": 2, - "cluster_id": "cluster-44625-9", - "cluster_ids": [ - "cluster-44625-9" - ], - "cluster_role": "member", - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44691", - "created_at": "2026-03-14T04:10:54Z", - "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44691/files", - "html_url": "https://github.com/huggingface/transformers/pull/44691", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44691, - "review_comments_count": 0, - "state": "closed", - "title": "Fix Qwen3.5 num_labels not propagated to text_config", - "updated_at": "2026-03-18T12:57:19Z" - }, - { - "additions": 6, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026", - "changed_files": 2, - "cluster_id": "cluster-44360-6", - "cluster_ids": [ - "cluster-44360-6" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44690", - "created_at": "2026-03-14T03:44:37Z", - "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44690/files", - "html_url": "https://github.com/huggingface/transformers/pull/44690", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44690, - "review_comments_count": 0, - "state": "closed", - "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring", - "updated_at": "2026-03-18T12:40:23Z" - }, - { - "additions": 141, - "author": "LincolnBurrows2017", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026", - "changed_files": 6, - "cluster_id": "cluster-44625-9", - "cluster_ids": [ - "cluster-44625-9" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44688", - "created_at": "2026-03-14T00:40:50Z", - "deletions": 23, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44688/files", - "html_url": "https://github.com/huggingface/transformers/pull/44688", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44688, - "review_comments_count": 0, - "state": "closed", - "title": "fix: Propagate num_labels to text_config in Qwen models", - "updated_at": "2026-03-18T12:56:41Z" - }, - { - "additions": 8, - "author": "vxa8502", - "author_association": "NONE", - "body_excerpt": "Fixes partial #32937 Adds explicit `position_ids` threading through GPT-Neo's attention layers to enable flash attention's packed sequence optimization. ## Context GPT-Neo uses learned absolute position embeddings (`wpe`) applied at the mo\u2026", - "changed_files": 1, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44687", - "created_at": "2026-03-13T23:28:55Z", - "deletions": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44768", + "created_at": "2026-03-16T18:29:52Z", + "deletions": 9, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44687/files", - "html_url": "https://github.com/huggingface/transformers/pull/44687", - "labels": [ - "Code agent slop" - ], - "merged": false, - "number": 44687, - "review_comments_count": 0, + "files_url": "https://github.com/huggingface/transformers/pull/44768/files", + "html_url": "https://github.com/huggingface/transformers/pull/44768", + "labels": [], + "merged": true, + "number": 44768, + "review_comments_count": 3, "state": "closed", - "title": "Add explicit position_ids to GPT-Neo attention layers", - "updated_at": "2026-03-18T13:06:49Z" + "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026", + "updated_at": "2026-04-09T17:11:55Z" }, { - "additions": 615, - "author": "tejasae-afk", - "author_association": "NONE", - "body_excerpt": "During an automated code review of src/transformers/models/marian/convert_marian_to_pytorch.py, the following issue was identified. Use safe_load in convert marian to pytorch. yaml.load on untrusted input can construct arbitrary Python obj\u2026", - "changed_files": 80, + "additions": 11, + "author": "tarekziade", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44686", - "created_at": "2026-03-13T21:22:07Z", - "deletions": 259, + "conversation_url": "https://github.com/huggingface/transformers/pull/44767", + "created_at": "2026-03-16T17:31:26Z", + "deletions": 5, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44686/files", - "html_url": "https://github.com/huggingface/transformers/pull/44686", + "files_url": "https://github.com/huggingface/transformers/pull/44767/files", + "html_url": "https://github.com/huggingface/transformers/pull/44767", "labels": [], - "merged": false, - "number": 44686, - "review_comments_count": 0, + "merged": true, + "number": 44767, + "review_comments_count": 6, "state": "closed", - "title": "Use safe_load in convert marian to pytorch", - "updated_at": "2026-03-14T03:54:31Z" + "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs", + "updated_at": "2026-03-16T19:02:31Z" }, { - "additions": 10, - "author": "ydshieh", + "additions": 26, + "author": "itazap", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? For tiny model creation script - new added model test files still miss this argument ...", - "changed_files": 3, + "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44685", - "created_at": "2026-03-13T20:53:41Z", - "deletions": 3, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44766", + "created_at": "2026-03-16T17:30:23Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44685/files", - "html_url": "https://github.com/huggingface/transformers/pull/44685", + "files_url": "https://github.com/huggingface/transformers/pull/44766/files", + "html_url": "https://github.com/huggingface/transformers/pull/44766", "labels": [], "merged": true, - "number": 44685, + "number": 44766, "review_comments_count": 0, "state": "closed", - "title": "Fix more model tester missing `parent` issue", - "updated_at": "2026-03-13T21:03:46Z" + "title": "support xxxFast alias in v5 tokenizers", + "updated_at": "2026-03-18T13:40:05Z" }, { - "additions": 41, - "author": "ntenenz", + "additions": 19, + "author": "harshaljanjani", "author_association": "CONTRIBUTOR", - "body_excerpt": "\u2026 # What does this PR do? In torch versions >= 2.9.0, it requests the lse from flex_attenetion using `AuxRequest` instead of the deprecated `return_lse`, which triggers a warning and can break tracing. Fixes #44683 ## Before submitting - [\u2026", - "changed_files": 1, + "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44684", - "created_at": "2026-03-13T20:16:35Z", - "deletions": 5, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44765", + "created_at": "2026-03-16T17:26:22Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44684/files", - "html_url": "https://github.com/huggingface/transformers/pull/44684", + "files_url": "https://github.com/huggingface/transformers/pull/44765/files", + "html_url": "https://github.com/huggingface/transformers/pull/44765", "labels": [], "merged": true, - "number": 44684, - "review_comments_count": 8, + "number": 44765, + "review_comments_count": 0, "state": "closed", - "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9", - "updated_at": "2026-03-18T11:44:18Z" + "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main", + "updated_at": "2026-03-20T13:55:55Z" }, { - "additions": 301, - "author": "SunMarc", + "additions": 12, + "author": "tarekziade", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44682", - "created_at": "2026-03-13T18:52:41Z", - "deletions": 73, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44764", + "created_at": "2026-03-16T17:15:40Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44682/files", - "html_url": "https://github.com/huggingface/transformers/pull/44682", + "files_url": "https://github.com/huggingface/transformers/pull/44764/files", + "html_url": "https://github.com/huggingface/transformers/pull/44764", "labels": [], - "merged": false, - "number": 44682, - "review_comments_count": 0, - "state": "open", - "title": "transformers serve + llamacpp", - "updated_at": "2026-03-14T07:05:29Z" + "merged": true, + "number": 44764, + "review_comments_count": 2, + "state": "closed", + "title": "fix: sig lip import", + "updated_at": "2026-03-16T17:38:41Z" }, { - "additions": 47, - "author": "dacorvo", + "additions": 17, + "author": "xenova", "author_association": "MEMBER", - "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44680", - "created_at": "2026-03-13T17:55:54Z", - "deletions": 1, + "comments_count": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/44763", + "created_at": "2026-03-16T17:04:36Z", + "deletions": 5, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44680/files", - "html_url": "https://github.com/huggingface/transformers/pull/44680", + "files_url": "https://github.com/huggingface/transformers/pull/44763/files", + "html_url": "https://github.com/huggingface/transformers/pull/44763", "labels": [], "merged": false, - "number": 44680, - "review_comments_count": 12, - "state": "open", - "title": "Allow kernel modules to declare their preferred mask function", - "updated_at": "2026-03-19T11:27:09Z" + "number": 44763, + "review_comments_count": 1, + "state": "closed", + "title": "[nemotron_h] Add support for MLP mixers", + "updated_at": "2026-04-14T13:46:14Z" }, { - "additions": 9, - "author": "JokeYoonic", + "additions": 4, + "author": "BillionClaw", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026", + "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026", "changed_files": 1, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44676", - "created_at": "2026-03-13T16:28:01Z", - "deletions": 2, + "cluster_id": "cluster-44737-6", + "cluster_ids": [ + "cluster-44737-6" + ], + "cluster_role": "member", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44762", + "created_at": "2026-03-16T16:17:54Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44676/files", - "html_url": "https://github.com/huggingface/transformers/pull/44676", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44762/files", + "html_url": "https://github.com/huggingface/transformers/pull/44762", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44676, + "number": 44762, "review_comments_count": 0, - "state": "open", - "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights", - "updated_at": "2026-03-18T17:16:49Z" + "state": "closed", + "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation", + "updated_at": "2026-03-18T15:16:14Z" }, { - "additions": 32, - "author": "stevhliu", + "additions": 152, + "author": "tarekziade", "author_association": "MEMBER", - "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: \"Screenshot", - "changed_files": 1, + "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44675", - "created_at": "2026-03-13T16:10:28Z", - "deletions": 14, + "conversation_url": "https://github.com/huggingface/transformers/pull/44761", + "created_at": "2026-03-16T16:05:03Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44675/files", - "html_url": "https://github.com/huggingface/transformers/pull/44675", + "files_url": "https://github.com/huggingface/transformers/pull/44761/files", + "html_url": "https://github.com/huggingface/transformers/pull/44761", "labels": [], "merged": true, - "number": 44675, - "review_comments_count": 0, + "number": 44761, + "review_comments_count": 7, "state": "closed", - "title": "[docs] cb config", - "updated_at": "2026-03-13T23:15:04Z" + "title": "model-linter: Added rule 10", + "updated_at": "2026-03-17T08:52:19Z" }, { - "additions": 408, - "author": "Rocketknight1", + "additions": 2090, + "author": "juliendenize", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? save locally --> local locally) ```\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44659", - "created_at": "2026-03-13T11:08:13Z", + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44730", + "created_at": "2026-03-15T20:44:32Z", "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44659/files", - "html_url": "https://github.com/huggingface/transformers/pull/44659", + "files_url": "https://github.com/huggingface/transformers/pull/44730/files", + "html_url": "https://github.com/huggingface/transformers/pull/44730", "labels": [], - "merged": false, - "number": 44659, - "review_comments_count": 0, - "state": "open", - "title": "docs: remove outdated use_diff docstring from DistributedConfig.to_js\u2026", - "updated_at": "2026-03-13T13:42:29Z" + "merged": true, + "number": 44730, + "review_comments_count": 6, + "state": "closed", + "title": "Fix `mlcd` auto config/model/mapping issues", + "updated_at": "2026-03-16T12:12:30Z" }, { - "additions": 18, - "author": "Charly21r", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Fixes an issue where the `.modelcard` attribute of a pipeline is not initialized. Without this fix, calling `save_pretrained` on a pipeline (e.g., ASR pipeline) raises an `AttributeError` because `.modelcard` does n\u2026", - "changed_files": 2, + "additions": 214, + "author": "xenova", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026", + "changed_files": 58, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44658", - "created_at": "2026-03-13T10:36:22Z", - "deletions": 0, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44729", + "created_at": "2026-03-15T20:29:38Z", + "deletions": 225, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44658/files", - "html_url": "https://github.com/huggingface/transformers/pull/44658", + "files_url": "https://github.com/huggingface/transformers/pull/44729/files", + "html_url": "https://github.com/huggingface/transformers/pull/44729", "labels": [], "merged": false, - "number": 44658, + "number": 44729, "review_comments_count": 0, - "state": "closed", - "title": "Fix: fix Pipeline's save_pretrained method (issue #44655)", - "updated_at": "2026-03-13T14:08:27Z" + "state": "open", + "title": "Avoid floating point math for ceil operations", + "updated_at": "2026-03-15T20:49:34Z" }, { - "additions": 1, - "author": "kaixuanliu", - "author_association": "CONTRIBUTOR", - "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review. Thx!", - "changed_files": 1, + "additions": 88, + "author": "ajmeese7", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44657", - "created_at": "2026-03-13T10:25:07Z", - "deletions": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44728", + "created_at": "2026-03-15T19:56:44Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44657/files", - "html_url": "https://github.com/huggingface/transformers/pull/44657", + "files_url": "https://github.com/huggingface/transformers/pull/44728/files", + "html_url": "https://github.com/huggingface/transformers/pull/44728", "labels": [], - "merged": true, - "number": 44657, + "merged": false, + "number": 44728, "review_comments_count": 0, "state": "closed", - "title": "fix bug embedding_size mismatch with hidden_size in electra model test", - "updated_at": "2026-04-01T08:24:54Z" + "title": "Fix float16 memory leak during 4-bit quantized model loading", + "updated_at": "2026-03-16T20:53:54Z" }, { - "additions": 80, - "author": "juliendenize", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? This PR add support to mistral-common v1.10.0. This involves: - reasoning effort feature - Python 3.14 Also add `image_sizes` in `apply_chat_template` output to match what is expected by standard processor.", - "changed_files": 4, + "additions": 202, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.", + "changed_files": 11, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44656", - "created_at": "2026-03-13T10:24:11Z", - "deletions": 15, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44727", + "created_at": "2026-03-15T19:41:24Z", + "deletions": 33, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44656/files", - "html_url": "https://github.com/huggingface/transformers/pull/44656", - "labels": [], - "merged": true, - "number": 44656, - "review_comments_count": 1, + "files_url": "https://github.com/huggingface/transformers/pull/44727/files", + "html_url": "https://github.com/huggingface/transformers/pull/44727", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44727, + "review_comments_count": 0, "state": "closed", - "title": "[MistralCommonBackend] Upgrade mistral-common to v1.10.0", - "updated_at": "2026-03-16T12:46:29Z" + "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file", + "updated_at": "2026-03-18T13:15:46Z" }, { - "additions": 13, - "author": "jiqing-feng", - "author_association": "CONTRIBUTOR", - "body_excerpt": "This PR fixes the AWQ test suite to align with the GPTQModel migration (following #41567 and #42776). ### Changes - **Fix `replace_with_awq_linear` return value**: The function now returns the model directly instead of a tuple `(model, _)`\u2026", - "changed_files": 1, + "additions": 198, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).", + "changed_files": 10, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44654", - "created_at": "2026-03-13T07:31:19Z", - "deletions": 13, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44725", + "created_at": "2026-03-15T17:41:18Z", + "deletions": 29, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44654/files", - "html_url": "https://github.com/huggingface/transformers/pull/44654", - "labels": [], - "merged": true, - "number": 44654, + "files_url": "https://github.com/huggingface/transformers/pull/44725/files", + "html_url": "https://github.com/huggingface/transformers/pull/44725", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44725, "review_comments_count": 0, "state": "closed", - "title": "Fix AWQ tests for GPTQModel migration", - "updated_at": "2026-03-13T16:28:14Z" + "title": "fix: replace bare except with Exception in Fuyu image processing", + "updated_at": "2026-03-18T13:16:22Z" }, { - "additions": 18, - "author": "kaixuanliu", - "author_association": "CONTRIBUTOR", - "body_excerpt": "@zucchini-nlp, can you help review? Thx! unit tests to reproduce this bug: `tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py::Phi4MultimodalIntegrationTest::test_audio_text_generation`", - "changed_files": 1, + "additions": 6, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? TO be explained.", + "changed_files": 5, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44653", - "created_at": "2026-03-13T07:14:25Z", - "deletions": 9, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44653/files", - "html_url": "https://github.com/huggingface/transformers/pull/44653", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44724", + "created_at": "2026-03-15T17:14:12Z", + "deletions": 5, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44724/files", + "html_url": "https://github.com/huggingface/transformers/pull/44724", "labels": [], "merged": false, - "number": 44653, - "review_comments_count": 7, - "state": "closed", - "title": "Fix `AutoImageProcessor` to correctly detect local implementation whe\u2026", - "updated_at": "2026-04-09T02:32:53Z" + "number": 44724, + "review_comments_count": 1, + "state": "open", + "title": "Fix some missing / incorrect entries in auto files", + "updated_at": "2026-03-16T09:59:56Z" }, { - "additions": 8, - "author": "gambletan", + "additions": 12, + "author": "aashirpersonal", "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44623 `processor.save_pretrained()` in v5 only saves the unified `processor_config.json` with nested sub-processor configs, but does not save standalone config files like `preprocessor_config.json` for the image processor\u2026", - "changed_files": 1, + "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44652", - "created_at": "2026-03-13T05:38:10Z", - "deletions": 0, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44723", + "created_at": "2026-03-15T16:52:03Z", + "deletions": 6, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44652/files", - "html_url": "https://github.com/huggingface/transformers/pull/44652", + "files_url": "https://github.com/huggingface/transformers/pull/44723/files", + "html_url": "https://github.com/huggingface/transformers/pull/44723", "labels": [ "Code agent slop" ], "merged": false, - "number": 44652, + "number": 44723, "review_comments_count": 0, "state": "closed", - "title": "Fix processor.save_pretrained missing sub-processor config files", - "updated_at": "2026-03-13T12:03:37Z" + "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", + "updated_at": "2026-03-18T15:05:52Z" }, { - "additions": 10, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44625 When passing `num_labels` to `AutoConfig.from_pretrained` for Qwen3.5, the value is set on the outer `Qwen3_5Config` but not propagated to `text_config`. This causes `AutoModelForSequenceClassification` to use the d\u2026", + "additions": 38, + "author": "chandan11248", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026", "changed_files": 2, - "cluster_id": "cluster-44625-9", + "cluster_id": "cluster-43979-24", "cluster_ids": [ - "cluster-44625-9" + "cluster-43979-24" ], - "cluster_role": "canonical", - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44651", - "created_at": "2026-03-13T05:35:29Z", - "deletions": 0, + "cluster_role": "member", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44722", + "created_at": "2026-03-15T15:33:25Z", + "deletions": 110, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44651/files", - "html_url": "https://github.com/huggingface/transformers/pull/44651", + "files_url": "https://github.com/huggingface/transformers/pull/44722/files", + "html_url": "https://github.com/huggingface/transformers/pull/44722", "labels": [], "merged": false, - "number": 44651, + "number": 44722, "review_comments_count": 0, - "state": "closed", - "title": "Fix Qwen3.5 num_labels not propagated to text_config", - "updated_at": "2026-03-13T13:39:43Z" + "state": "open", + "title": "Refactor gptj output tracing to use standardized decorators", + "updated_at": "2026-03-19T18:12:59Z" }, { - "additions": 188, - "author": "shaealh", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Closes #44593 ## Summary - use generation_input_ids/generation_attention_mask when provided for decoder-only models - otherwise infer prompt from leading -100 labels and build left-padded prompt batch - return completion tokens for decoder\u2026", - "changed_files": 2, + "additions": 4, + "author": "rsmed31", + "author_association": "NONE", + "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44650", - "created_at": "2026-03-13T05:33:59Z", - "deletions": 6, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44718", + "created_at": "2026-03-14T23:57:14Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44650/files", - "html_url": "https://github.com/huggingface/transformers/pull/44650", + "files_url": "https://github.com/huggingface/transformers/pull/44718/files", + "html_url": "https://github.com/huggingface/transformers/pull/44718", "labels": [], "merged": false, - "number": 44650, + "number": 44718, "review_comments_count": 0, - "state": "open", - "title": "Fix Seq2SeqTrainer generation path for decoder-only models", - "updated_at": "2026-04-02T21:23:53Z" + "state": "closed", + "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel", + "updated_at": "2026-03-15T17:58:58Z" }, { - "additions": 12, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44623 In v5.3.0, `ProcessorMixin.save_pretrained()` no longer calls `save_pretrained()` on non-tokenizer components (e.g. `image_processor`, `feature_extractor`). This means their individual config files (e.g. `preprocess\u2026", - "changed_files": 1, + "additions": 15, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).", + "changed_files": 7, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44649", - "created_at": "2026-03-13T05:22:42Z", - "deletions": 207, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44715", + "created_at": "2026-03-14T21:11:52Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44649/files", - "html_url": "https://github.com/huggingface/transformers/pull/44649", + "files_url": "https://github.com/huggingface/transformers/pull/44715/files", + "html_url": "https://github.com/huggingface/transformers/pull/44715", "labels": [], - "merged": false, - "number": 44649, + "merged": true, + "number": 44715, "review_comments_count": 0, "state": "closed", - "title": "fix: ensure processor.save_pretrained saves all component files", - "updated_at": "2026-03-13T05:36:54Z" + "title": "Fix missing / incorrect `config` class in some model class definitions", + "updated_at": "2026-03-15T11:19:51Z" }, { - "additions": 0, - "author": "gambletan", - "author_association": "NONE", - "body_excerpt": "## Summary Fixes #44625 When `num_labels` or `id2label` are passed to `Qwen3_5Config` (e.g. via `AutoConfig.from_pretrained(model_name, num_labels=1)`), they are only set on the outer composite config but **not forwarded to `text_config`**\u2026", - "changed_files": 0, + "additions": 181, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026", + "changed_files": 8, "cluster_id": "cluster-44625-9", "cluster_ids": [ "cluster-44625-9" ], "cluster_role": "member", - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44648", - "created_at": "2026-03-13T05:22:03Z", - "deletions": 0, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44714", + "created_at": "2026-03-14T20:42:46Z", + "deletions": 26, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44648/files", - "html_url": "https://github.com/huggingface/transformers/pull/44648", + "files_url": "https://github.com/huggingface/transformers/pull/44714/files", + "html_url": "https://github.com/huggingface/transformers/pull/44714", "labels": [], "merged": false, - "number": 44648, + "number": 44714, "review_comments_count": 0, "state": "closed", - "title": "fix: propagate num_labels to text_config in Qwen3.5", - "updated_at": "2026-03-13T05:33:26Z" + "title": "fix: propagate num_labels to text_config for Qwen models", + "updated_at": "2026-03-18T12:56:27Z" }, { - "additions": 9, - "author": "kaixuanliu", - "author_association": "CONTRIBUTOR", - "body_excerpt": "@remi-or @ArthurZucker @McPatate pls help review, thx!", + "additions": 15, + "author": "kulkarni-rohan", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44713", + "created_at": "2026-03-14T20:20:14Z", + "deletions": 28, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44713/files", + "html_url": "https://github.com/huggingface/transformers/pull/44713", + "labels": [], + "merged": false, + "number": 44713, + "review_comments_count": 0, + "state": "open", + "title": "[ColQwen2] Refactor output tracing (issue #43979)", + "updated_at": "2026-03-14T20:21:24Z" + }, + { + "additions": 2, + "author": "ydshieh", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44712", + "created_at": "2026-03-14T20:18:01Z", + "deletions": 2, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44712/files", + "html_url": "https://github.com/huggingface/transformers/pull/44712", + "labels": [], + "merged": true, + "number": 44712, + "review_comments_count": 0, + "state": "closed", + "title": "Update Nvidia CI docker file to use torch 2.10", + "updated_at": "2026-03-14T20:29:04Z" + }, + { + "additions": 339, + "author": "anuq", + "author_association": "NONE", + "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026", + "changed_files": 4, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44647", - "created_at": "2026-03-13T04:55:26Z", - "deletions": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44711", + "created_at": "2026-03-14T19:21:21Z", + "deletions": 205, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44647/files", - "html_url": "https://github.com/huggingface/transformers/pull/44647", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44711/files", + "html_url": "https://github.com/huggingface/transformers/pull/44711", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44647, + "number": 44711, "review_comments_count": 0, "state": "closed", - "title": "Add more generic device support for continuous batching", - "updated_at": "2026-04-09T02:32:51Z" + "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`", + "updated_at": "2026-03-20T13:36:58Z" }, { - "additions": 4, - "author": "LincolnBurrows2017", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Fixed typo in Qwen3.5 and Qwen3-VL-MoE model files ```diff - Since Qwen3.5 use timestamps to seperate videos + Since Qwen3.5 use timestamps to separate videos ```", - "changed_files": 2, + "additions": 12, + "author": "he-yufeng", + "author_association": "CONTRIBUTOR", + "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44646", - "created_at": "2026-03-13T04:48:06Z", - "deletions": 4, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44710", + "created_at": "2026-03-14T18:33:53Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44646/files", - "html_url": "https://github.com/huggingface/transformers/pull/44646", + "files_url": "https://github.com/huggingface/transformers/pull/44710/files", + "html_url": "https://github.com/huggingface/transformers/pull/44710", "labels": [], - "merged": false, - "number": 44646, + "merged": true, + "number": 44710, "review_comments_count": 0, - "state": "open", - "title": "Fix typo: seperate -> separate", - "updated_at": "2026-03-13T15:27:08Z" + "state": "closed", + "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs", + "updated_at": "2026-03-25T18:13:14Z" }, { - "additions": 4, - "author": "sywangyi", - "author_association": "CONTRIBUTOR", - "body_excerpt": "- Intel XPU: @IlyasMoutawwakil ``` ======================================================================== FAILURES ======================================================================== _________________________________________________\u2026", - "changed_files": 3, + "additions": 6778, + "author": "LucasMa2025", + "author_association": "FIRST_TIMER", + "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026", + "changed_files": 15, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44645", - "created_at": "2026-03-13T02:53:31Z", - "deletions": 4, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44645/files", - "html_url": "https://github.com/huggingface/transformers/pull/44645", + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44708", + "created_at": "2026-03-14T17:13:34Z", + "deletions": 7, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44708/files", + "html_url": "https://github.com/huggingface/transformers/pull/44708", "labels": [], - "merged": true, - "number": 44645, + "merged": false, + "number": 44708, "review_comments_count": 0, "state": "closed", - "title": "fix Image.open failure in case \"tests/models/prompt_depth_anything/te\u2026", - "updated_at": "2026-03-27T11:11:05Z" + "title": "Add Configurable Generation Scheduler and State Machine for `generate()`", + "updated_at": "2026-03-14T19:19:11Z" }, { - "additions": 2, - "author": "sywangyi", - "author_association": "CONTRIBUTOR", - "body_excerpt": "\u2026MXFP4PseudoquantTest::test_quantized_model fail in xpu ## Who can review? - quantization: @SunMarc - Intel XPU: @IlyasMoutawwakil", + "additions": 3, + "author": "saivedant169", + "author_association": "NONE", + "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44644", - "created_at": "2026-03-13T02:02:45Z", - "deletions": 1, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44707", + "created_at": "2026-03-14T17:12:16Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44644/files", - "html_url": "https://github.com/huggingface/transformers/pull/44644", - "labels": [], - "merged": true, - "number": 44644, - "review_comments_count": 6, + "files_url": "https://github.com/huggingface/transformers/pull/44707/files", + "html_url": "https://github.com/huggingface/transformers/pull/44707", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44707, + "review_comments_count": 0, "state": "closed", - "title": "fix tests/quantization/fp_quant_integration/test_fp_quant.py::FPQuant\u2026", - "updated_at": "2026-03-27T14:14:13Z" + "title": "Add position_ids to MptForCausalLM forward pass", + "updated_at": "2026-03-18T13:39:36Z" }, { - "additions": 5, - "author": "joshuaswanson", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "The generic `labels` docstring in `ModelArgs` says \"masked language modeling loss\" and doesn't mention that causal LM models shift labels internally. This has tripped up a lot of users who pre-shift their labels and end up training next-ne\u2026", + "additions": 3, + "author": "saivedant169", + "author_association": "NONE", + "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44642", - "created_at": "2026-03-12T23:47:11Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44706", + "created_at": "2026-03-14T17:09:11Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44642/files", - "html_url": "https://github.com/huggingface/transformers/pull/44642", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44706/files", + "html_url": "https://github.com/huggingface/transformers/pull/44706", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44642, + "number": 44706, "review_comments_count": 0, - "state": "open", - "title": "Clarify that causal LM labels are shifted internally", - "updated_at": "2026-03-13T00:02:30Z" + "state": "closed", + "title": "Add position_ids to BloomForCausalLM forward pass", + "updated_at": "2026-03-18T13:39:51Z" }, { - "additions": 1, - "author": "kmbhattt-aws", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Issue: A full 4D attention mask of shape `[1, 1, seq_len, seq_len]` is being created during attention, even when not using alibi for positional embeddings. - This occupied extra memory during training. Root Cause: T\u2026", + "additions": 14, + "author": "saivedant169", + "author_association": "NONE", + "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44641", - "created_at": "2026-03-12T23:01:11Z", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44705", + "created_at": "2026-03-14T16:48:06Z", "deletions": 1, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44641/files", - "html_url": "https://github.com/huggingface/transformers/pull/44641", - "labels": [], + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44705/files", + "html_url": "https://github.com/huggingface/transformers/pull/44705", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44641, - "review_comments_count": 1, - "state": "open", - "title": "Conditinally passing and_mask_function arg to create_causal_mask ", - "updated_at": "2026-03-13T02:09:22Z" + "number": 44705, + "review_comments_count": 0, + "state": "closed", + "title": "Add position_ids to RoFormerForCausalLM forward pass", + "updated_at": "2026-03-18T13:40:05Z" }, { - "additions": 11, - "author": "michaelbenayoun", + "additions": 26, + "author": "vasqu", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? - Add `\"embedding_colwise\"` tp plan - Add register methods for `ParallelInterface`. Without it, we can register plans with the register method, but not the corresponding `plan_to_weight_dim` and `plan_to_bias_dim`.", - "changed_files": 1, + "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44640", - "created_at": "2026-03-12T20:14:06Z", - "deletions": 0, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44703", + "created_at": "2026-03-14T14:46:02Z", + "deletions": 10, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44640/files", - "html_url": "https://github.com/huggingface/transformers/pull/44640", + "files_url": "https://github.com/huggingface/transformers/pull/44703/files", + "html_url": "https://github.com/huggingface/transformers/pull/44703", "labels": [], "merged": true, - "number": 44640, - "review_comments_count": 0, + "number": 44703, + "review_comments_count": 1, "state": "closed", - "title": "Add register method for `ParallelInterface`", - "updated_at": "2026-03-13T18:57:48Z" + "title": "[`FA`] Fix fa detection", + "updated_at": "2026-03-14T17:19:07Z" }, { - "additions": 24099, - "author": "ArthurZucker", - "author_association": "MEMBER", - "body_excerpt": "## Summary Fixes bugs introduced during the `__init__` \u2192 `@dataclass` conversion in #41250. All are incorrect default values caught by automated comparison of `__init__` signatures (main) vs dataclass fields (PR). | Model | Param | Was | S\u2026", - "changed_files": 931, + "additions": 148, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026", + "changed_files": 8, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44639", - "created_at": "2026-03-12T16:49:54Z", - "deletions": 38773, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44702", + "created_at": "2026-03-14T14:41:15Z", + "deletions": 25, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44639/files", - "html_url": "https://github.com/huggingface/transformers/pull/44639", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44702/files", + "html_url": "https://github.com/huggingface/transformers/pull/44702", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44639, + "number": 44702, "review_comments_count": 0, "state": "closed", - "title": "Fix incorrect default values in config dataclass migration", - "updated_at": "2026-03-12T16:50:10Z" + "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig", + "updated_at": "2026-03-18T13:00:12Z" }, { - "additions": 19, - "author": "Cyrilvallez", + "additions": 219, + "author": "hmellor", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/44614. This one is the result of a long debugging session and discussion with @vasqu. The issue is as follow: - Backbone ALWAYS need to c\u2026", - "changed_files": 3, + "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026", + "changed_files": 52, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44638", - "created_at": "2026-03-12T16:19:49Z", - "deletions": 10, + "conversation_url": "https://github.com/huggingface/transformers/pull/44699", + "created_at": "2026-03-14T11:44:24Z", + "deletions": 148, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44638/files", - "html_url": "https://github.com/huggingface/transformers/pull/44638", + "files_url": "https://github.com/huggingface/transformers/pull/44699/files", + "html_url": "https://github.com/huggingface/transformers/pull/44699", "labels": [], "merged": true, - "number": 44638, - "review_comments_count": 2, + "number": 44699, + "review_comments_count": 0, "state": "closed", - "title": "Fix output capturing for Backbones", - "updated_at": "2026-03-12T17:11:32Z" + "title": "Fix several based models' pipeline parallel support", + "updated_at": "2026-03-20T13:53:27Z" }, { - "additions": 571, - "author": "tarekziade", + "additions": 1, + "author": "hmellor", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? - Adds an `httpx` tracer to gather metrics about network calls - Collect and store metrics and generates an artifact in CI - Can be used locally with `DEBUG_NETWORK` - Activated in CircleCI example of local run: ```\u2026", - "changed_files": 4, + "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44636", - "created_at": "2026-03-12T15:25:10Z", - "deletions": 0, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44698", + "created_at": "2026-03-14T11:18:54Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44636/files", - "html_url": "https://github.com/huggingface/transformers/pull/44636", + "files_url": "https://github.com/huggingface/transformers/pull/44698/files", + "html_url": "https://github.com/huggingface/transformers/pull/44698", "labels": [], "merged": true, - "number": 44636, - "review_comments_count": 5, + "number": 44698, + "review_comments_count": 0, "state": "closed", - "title": "feat(ci): added a network debug report", - "updated_at": "2026-03-18T19:19:03Z" + "title": "Fix `set_encoder`", + "updated_at": "2026-03-14T13:42:00Z" }, { - "additions": 111, - "author": "RyanMullins", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Per a conversion with @Cyrilvallez on Slack on how to make Gemma models more compatible with modular inheritance, this PR: * Updates Gemma models to use `nn.parameter.Buffer` instead of a `self.register_buffer()` fo\u2026", - "changed_files": 32, + "additions": 75, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026", + "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44635", - "created_at": "2026-03-12T14:47:46Z", - "deletions": 87, + "conversation_url": "https://github.com/huggingface/transformers/pull/44697", + "created_at": "2026-03-14T10:44:12Z", + "deletions": 25, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44635/files", - "html_url": "https://github.com/huggingface/transformers/pull/44635", + "files_url": "https://github.com/huggingface/transformers/pull/44697/files", + "html_url": "https://github.com/huggingface/transformers/pull/44697", "labels": [], "merged": false, - "number": 44635, - "review_comments_count": 0, + "number": 44697, + "review_comments_count": 1, "state": "open", - "title": "[Gemma] Modular-friendly buffers", - "updated_at": "2026-03-18T10:44:25Z" + "title": "fix: torch_float should return float, not int", + "updated_at": "2026-03-17T19:29:02Z" }, { - "additions": 30, - "author": "Cyrilvallez", + "additions": 19, + "author": "hmellor", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As per the title", - "changed_files": 3, + "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44634", - "created_at": "2026-03-12T14:04:36Z", - "deletions": 6, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44696", + "created_at": "2026-03-14T09:41:07Z", + "deletions": 13, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44634/files", - "html_url": "https://github.com/huggingface/transformers/pull/44634", + "files_url": "https://github.com/huggingface/transformers/pull/44696/files", + "html_url": "https://github.com/huggingface/transformers/pull/44696", "labels": [], "merged": true, - "number": 44634, - "review_comments_count": 1, + "number": 44696, + "review_comments_count": 5, "state": "closed", - "title": "Fix lfm2 kernel path", - "updated_at": "2026-03-12T15:00:59Z" + "title": "Fix `supports_{tp/pp}_plan`", + "updated_at": "2026-03-31T13:12:56Z" }, { - "additions": 26, - "author": "eustlb", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As per title \ud83e\udd17", - "changed_files": 1, + "additions": 4, + "author": "harshaljanjani", + "author_association": "CONTRIBUTOR", + "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44633", - "created_at": "2026-03-12T13:35:48Z", - "deletions": 11, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/44695", + "created_at": "2026-03-14T09:05:35Z", + "deletions": 4, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44633/files", - "html_url": "https://github.com/huggingface/transformers/pull/44633", + "files_url": "https://github.com/huggingface/transformers/pull/44695/files", + "html_url": "https://github.com/huggingface/transformers/pull/44695", "labels": [], "merged": true, - "number": 44633, - "review_comments_count": 2, + "number": 44695, + "review_comments_count": 3, "state": "closed", - "title": "[medasr] doc update", - "updated_at": "2026-03-16T09:39:50Z" + "title": "fix(testing): Fix Kyutai Speech-To-Text and LongCatFlash test failures on main CI", + "updated_at": "2026-04-09T15:41:05Z" }, { - "additions": 35, - "author": "Abdennacer-Badaoui", - "author_association": "MEMBER", - "body_excerpt": "Summary - Update test expectations for affected models - Add some needed dependencies - Fix TypeError: `GenerationMixin.prepare_inputs_for_generation()` got multiple values for argument 'next_sequence_length' in Qwen2.5-Omni talker by pass\u2026", - "changed_files": 6, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44632", - "created_at": "2026-03-12T13:32:33Z", - "deletions": 22, + "additions": 143, + "author": "LincolnBurrows2017", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026", + "changed_files": 7, + "cluster_id": "cluster-44625-9", + "cluster_ids": [ + "cluster-44625-9" + ], + "cluster_role": "member", + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44693", + "created_at": "2026-03-14T05:43:00Z", + "deletions": 30, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44632/files", - "html_url": "https://github.com/huggingface/transformers/pull/44632", + "files_url": "https://github.com/huggingface/transformers/pull/44693/files", + "html_url": "https://github.com/huggingface/transformers/pull/44693", "labels": [], - "merged": true, - "number": 44632, - "review_comments_count": 6, + "merged": false, + "number": 44693, + "review_comments_count": 0, "state": "closed", - "title": "[AMD CI] Fix test failures across important models ", - "updated_at": "2026-03-17T14:58:10Z" + "title": "fix: Propagate num_labels to text_config in Qwen3.5", + "updated_at": "2026-03-18T12:56:25Z" }, { - "additions": 33, - "author": "RyanMullins", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Updates the weights conversion scripts for Gemma to: * Use the new `SentencePieceExtractor` class to get the vocab and merges from the SPM * Always initialize and save the unified `GemmaTokenizer` class ## Before su\u2026", - "changed_files": 4, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, + "additions": 18, + "author": "gambletan", + "author_association": "NONE", + "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026", + "changed_files": 2, + "cluster_id": "cluster-44514-8", + "cluster_ids": [ + "cluster-44514-8" + ], + "cluster_role": "member", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44692", + "created_at": "2026-03-14T04:14:38Z", + "deletions": 10, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44692/files", + "html_url": "https://github.com/huggingface/transformers/pull/44692", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44692, + "review_comments_count": 0, + "state": "closed", + "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template", + "updated_at": "2026-03-18T12:44:18Z" + }, + { + "additions": 23, + "author": "gambletan", + "author_association": "NONE", + "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026", + "changed_files": 2, + "cluster_id": "cluster-44625-9", + "cluster_ids": [ + "cluster-44625-9" + ], + "cluster_role": "member", "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44631", - "created_at": "2026-03-12T13:32:25Z", - "deletions": 45, + "conversation_url": "https://github.com/huggingface/transformers/pull/44691", + "created_at": "2026-03-14T04:10:54Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44631/files", - "html_url": "https://github.com/huggingface/transformers/pull/44631", - "labels": [], - "merged": true, - "number": 44631, + "files_url": "https://github.com/huggingface/transformers/pull/44691/files", + "html_url": "https://github.com/huggingface/transformers/pull/44691", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44691, + "review_comments_count": 0, + "state": "closed", + "title": "Fix Qwen3.5 num_labels not propagated to text_config", + "updated_at": "2026-03-18T12:57:19Z" + }, + { + "additions": 6, + "author": "gambletan", + "author_association": "NONE", + "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026", + "changed_files": 2, + "cluster_id": "cluster-44360-6", + "cluster_ids": [ + "cluster-44360-6" + ], + "cluster_role": "member", + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44690", + "created_at": "2026-03-14T03:44:37Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44690/files", + "html_url": "https://github.com/huggingface/transformers/pull/44690", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44690, "review_comments_count": 0, "state": "closed", - "title": "[Gemma] Update conversion scripts for Transformers v5 Comaptibility", - "updated_at": "2026-03-18T10:39:53Z" + "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring", + "updated_at": "2026-03-18T12:40:23Z" }, { - "additions": 42, - "author": "MaybeSam05", + "additions": 141, + "author": "LincolnBurrows2017", "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Fixes a bug where `num_labels` passed to `AutoConfig.from_pretrained` for Qwen3.5 did not propagate from the top\u2011level `Qwen3_5Config` into the `text_config`, so `AutoModelForSequenceClassification` still saw the de\u2026", - "changed_files": 2, + "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026", + "changed_files": 6, "cluster_id": "cluster-44625-9", "cluster_ids": [ "cluster-44625-9" ], "cluster_role": "member", "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44630", - "created_at": "2026-03-12T13:25:54Z", - "deletions": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44688", + "created_at": "2026-03-14T00:40:50Z", + "deletions": 23, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44630/files", - "html_url": "https://github.com/huggingface/transformers/pull/44630", + "files_url": "https://github.com/huggingface/transformers/pull/44688/files", + "html_url": "https://github.com/huggingface/transformers/pull/44688", "labels": [ "Code agent slop" ], "merged": false, - "number": 44630, + "number": 44688, "review_comments_count": 0, "state": "closed", - "title": "Fix Qwen3.5 num_labels propagation to text_config (fix #44625)", - "updated_at": "2026-03-12T13:46:07Z" - }, - { - "additions": 15, - "author": "zucchini-nlp", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes \"auto\" dtype when the model is initialized `from_config` It was already fixed for `from_pretrained` in https://github.com/huggingface/transformers/pull/42990 but vLLM creates models with `AutoModel._from_confi\u2026", - "changed_files": 2, - "cluster_id": null, - "cluster_ids": [], - "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44629", - "created_at": "2026-03-12T13:07:55Z", - "deletions": 0, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44629/files", - "html_url": "https://github.com/huggingface/transformers/pull/44629", - "labels": [], - "merged": true, - "number": 44629, - "review_comments_count": 3, - "state": "closed", - "title": "Ensure same `dtype` for subconfig when `_from_config`", - "updated_at": "2026-03-13T11:35:10Z" + "title": "fix: Propagate num_labels to text_config in Qwen models", + "updated_at": "2026-03-18T12:56:41Z" }, { - "additions": 37, - "author": "ydshieh", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? - `encoder_config` and `decoder_config` should return `None` for encoder / decoder config classes themselves. - The encoder / decoder model classes should have the correct config classes associated to them modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main", - "changed_files": 1, + "additions": 3, + "author": "tysoncung", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Fix minor typos found in comments and docstrings: - `orignal` \u2192 `original` in `src/transformers/integrations/peft.py` (lines 245, 284) - Duplicate word `is is` \u2192 `is` in `src/transformers/models/dia/processing_dia.py` (line 89) Small clean\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44283", - "created_at": "2026-02-25T18:33:17Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44332", + "created_at": "2026-02-27T16:11:46Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44283/files", - "html_url": "https://github.com/huggingface/transformers/pull/44283", + "files_url": "https://github.com/huggingface/transformers/pull/44332/files", + "html_url": "https://github.com/huggingface/transformers/pull/44332", "labels": [], "merged": true, - "number": 44283, + "number": 44332, "review_comments_count": 0, "state": "closed", - "title": "[`Modular`] Fix file type regression", - "updated_at": "2026-02-25T20:04:41Z" + "title": "Fix typos in comments and docstrings", + "updated_at": "2026-02-27T18:02:59Z" }, { - "additions": 5, - "author": "Rocketknight1", + "additions": 33, + "author": "kashif", "author_association": "MEMBER", - "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.", - "changed_files": 1, + "body_excerpt": "# What does this PR do? fixed the bfloat16 dtype mismatch and Loss computation shape mismatch. Also added tests for these. needs a test", - "changed_files": 36, + "additions": 3484, + "author": "jp1924", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Hello, Transformers team! I submitted a PR to add naver-hyperclovax/HyperCLOVAX-SEED-Think-32B (hereafter HCX), developed by the Korean IT company Naver while executing the government's national AI model project. Th\u2026", + "changed_files": 24, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44264", - "created_at": "2026-02-24T18:06:58Z", - "deletions": 210, - "draft": true, - "files_url": "https://github.com/huggingface/transformers/pull/44264/files", - "html_url": "https://github.com/huggingface/transformers/pull/44264", + "comments_count": 22, + "conversation_url": "https://github.com/huggingface/transformers/pull/44314", + "created_at": "2026-02-27T02:01:28Z", + "deletions": 18, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44314/files", + "html_url": "https://github.com/huggingface/transformers/pull/44314", "labels": [], "merged": false, - "number": 44264, - "review_comments_count": 3, + "number": 44314, + "review_comments_count": 77, "state": "open", - "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0", - "updated_at": "2026-02-25T18:53:20Z" + "title": "add HyperClovaX Vision", + "updated_at": "2026-04-13T02:23:53Z" }, { - "additions": 5882, - "author": "SunMarc", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI", - "changed_files": 18, + "additions": 4, + "author": "jashshah999", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Fixes the same `TypeError: AddedToken() got multiple values for keyword argument 'special'` that #44281 addressed, but for the `extra_special_tokens` code path which was missed. #44281 (commit 8e663c7) correctly added `value.pop(\"special\",\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44260", - "created_at": "2026-02-24T15:51:11Z", - "deletions": 6147, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44313", + "created_at": "2026-02-27T01:37:45Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44260/files", - "html_url": "https://github.com/huggingface/transformers/pull/44260", - "labels": [], - "merged": true, - "number": 44260, - "review_comments_count": 3, + "files_url": "https://github.com/huggingface/transformers/pull/44313/files", + "html_url": "https://github.com/huggingface/transformers/pull/44313", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44313, + "review_comments_count": 0, "state": "closed", - "title": "Update common tests Trainer", - "updated_at": "2026-02-27T17:31:59Z" + "title": "Fix AddedToken duplicate 'special' kwarg for extra_special_tokens", + "updated_at": "2026-02-27T14:26:28Z" }, { - "additions": 1830, - "author": "winglian", - "author_association": "COLLABORATOR", - "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44232", - "created_at": "2026-02-23T16:10:15Z", - "deletions": 109, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44283", + "created_at": "2026-02-25T18:33:17Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44232/files", - "html_url": "https://github.com/huggingface/transformers/pull/44232", + "files_url": "https://github.com/huggingface/transformers/pull/44283/files", + "html_url": "https://github.com/huggingface/transformers/pull/44283", "labels": [], "merged": true, - "number": 44232, - "review_comments_count": 2, + "number": 44283, + "review_comments_count": 0, "state": "closed", - "title": "chore: added CLAUDE.md alias", - "updated_at": "2026-02-24T14:48:36Z" + "title": "[`Modular`] Fix file type regression", + "updated_at": "2026-02-25T20:04:41Z" }, { - "additions": 413, - "author": "IlyasMoutawwakil", + "additions": 5, + "author": "Rocketknight1", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? needs a test", + "changed_files": 36, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44215", - "created_at": "2026-02-22T23:24:43Z", - "deletions": 13, - "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44215/files", - "html_url": "https://github.com/huggingface/transformers/pull/44215", + "conversation_url": "https://github.com/huggingface/transformers/pull/44264", + "created_at": "2026-02-24T18:06:58Z", + "deletions": 210, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/44264/files", + "html_url": "https://github.com/huggingface/transformers/pull/44264", "labels": [], "merged": false, - "number": 44215, - "review_comments_count": 0, + "number": 44264, + "review_comments_count": 3, "state": "open", - "title": "Add sequence classification capability to Granite models", - "updated_at": "2026-02-24T20:39:37Z" + "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0", + "updated_at": "2026-02-25T18:53:20Z" }, { - "additions": 70, - "author": "parthchopra07", - "author_association": "FIRST_TIMER", - "body_excerpt": "# What does this PR do? This PR refreshes the BEiT model documentation to align it with the current Transformers vision docs style and features. It updates the usage examples, clarifies configuration details, and improves the resources sec\u2026", - "changed_files": 1, + "additions": 5882, + "author": "SunMarc", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI", + "changed_files": 18, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44213", - "created_at": "2026-02-22T18:32:16Z", - "deletions": 29, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44260", + "created_at": "2026-02-24T15:51:11Z", + "deletions": 6147, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44213/files", - "html_url": "https://github.com/huggingface/transformers/pull/44213", + "files_url": "https://github.com/huggingface/transformers/pull/44260/files", + "html_url": "https://github.com/huggingface/transformers/pull/44260", "labels": [], - "merged": false, - "number": 44213, - "review_comments_count": 0, + "merged": true, + "number": 44260, + "review_comments_count": 3, "state": "closed", - "title": "Update BEiT model card", - "updated_at": "2026-02-28T14:33:57Z" + "title": "Update common tests Trainer", + "updated_at": "2026-02-27T17:31:59Z" }, { - "additions": 1, - "author": "alexandercarruthers", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? Updates a broken link in the summarization guide. https://huggingface.co/docs/transformers/tasks/summarization https://huggingface.co/billsum/datasets results in a 404. New URL is https://huggingface.co/datasets/Fis\u2026", - "changed_files": 1, + "additions": 1830, + "author": "winglian", + "author_association": "COLLABORATOR", + "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026", - "changed_files": 3, + "additions": 379, + "author": "cogniera", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "What does this PR do? This PR refactors the LongT5 model to use the @capture_outputs and @can_return_tuple decorators for standardized output handling across the model stack. The refactor removes manual handling of: output_attentions outpu\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 5, - "conversation_url": "https://github.com/huggingface/transformers/pull/44127", - "created_at": "2026-02-18T10:41:48Z", - "deletions": 8, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44161", + "created_at": "2026-02-19T20:46:49Z", + "deletions": 170, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44127/files", - "html_url": "https://github.com/huggingface/transformers/pull/44127", + "files_url": "https://github.com/huggingface/transformers/pull/44161/files", + "html_url": "https://github.com/huggingface/transformers/pull/44161", "labels": [], - "merged": true, - "number": 44127, + "merged": false, + "number": 44161, "review_comments_count": 0, - "state": "closed", - "title": "AutoTokenizer ignores config when model_type is None", - "updated_at": "2026-02-18T14:47:52Z" + "state": "open", + "title": "Refactor LongT5 to use @capture_outputs and @can_return_tuple decorators for unified output handling (Fixes #43979)", + "updated_at": "2026-02-20T17:28:12Z" }, { - "additions": 17, - "author": "Cyrilvallez", + "additions": 2104, + "author": "molbap", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848", - "changed_files": 2, + "body_excerpt": "# What does this PR do? Hey yall, I started porting the pi0 model so Transformers can be a backend for VLAs directly. I checked it against LeRobot on fix/lerobot_openpi: outputs seem to match and for sure lerobot/pi0_base loads cleanly (no\u2026", + "changed_files": 22, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44126", - "created_at": "2026-02-18T09:58:49Z", - "deletions": 40, + "comments_count": 13, + "conversation_url": "https://github.com/huggingface/transformers/pull/44160", + "created_at": "2026-02-19T17:16:29Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44126/files", - "html_url": "https://github.com/huggingface/transformers/pull/44126", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44160/files", + "html_url": "https://github.com/huggingface/transformers/pull/44160", + "labels": [ + "New model" + ], "merged": true, - "number": 44126, - "review_comments_count": 0, + "number": 44160, + "review_comments_count": 32, "state": "closed", - "title": "Simplify input preparation in generate", - "updated_at": "2026-02-18T10:30:48Z" + "title": "Add model lerobot PI0 to transformers", + "updated_at": "2026-03-16T10:23:14Z" }, { - "additions": 8, - "author": "zucchini-nlp", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986", - "changed_files": 1, + "additions": 67, + "author": "samuelleecong", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary Closes #28103 - Refactor `OwlViTAttention` to use `ALL_ATTENTION_FUNCTIONS` for dynamic attention backend dispatch (same pattern as CLIP) - Add `eager_attention_forward` standalone function with the standardized interface - Resh\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44125", - "created_at": "2026-02-18T09:34:54Z", - "deletions": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/44159", + "created_at": "2026-02-19T16:31:44Z", + "deletions": 61, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44125/files", - "html_url": "https://github.com/huggingface/transformers/pull/44125", + "files_url": "https://github.com/huggingface/transformers/pull/44159/files", + "html_url": "https://github.com/huggingface/transformers/pull/44159", "labels": [], - "merged": true, - "number": 44125, - "review_comments_count": 2, - "state": "closed", - "title": "Raise informative error when loading video processors", - "updated_at": "2026-02-20T08:23:35Z" + "merged": false, + "number": 44159, + "review_comments_count": 0, + "state": "open", + "title": "Add SDPA and Flash Attention support for OWL-ViT", + "updated_at": "2026-02-24T12:53:10Z" }, { - "additions": 10, - "author": "mariam851", + "additions": 141, + "author": "leopold-tzafon", "author_association": "CONTRIBUTOR", - "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026", - "changed_files": 2, + "body_excerpt": "# Fix issue where `use_cache=False`, corrupts model Qwen3vl output. Tested with: ``` import torch from transformers import Qwen3VLForConditionalGeneration, AutoProcessor MODEL_NAME = \"Qwen/Qwen3-VL-4B-Instruct\" DEVICE = \"cuda\" DTYPE = torc\u2026", + "changed_files": 9, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44124", - "created_at": "2026-02-18T08:52:23Z", - "deletions": 0, + "comments_count": 19, + "conversation_url": "https://github.com/huggingface/transformers/pull/44158", + "created_at": "2026-02-19T15:45:13Z", + "deletions": 36, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44124/files", - "html_url": "https://github.com/huggingface/transformers/pull/44124", + "files_url": "https://github.com/huggingface/transformers/pull/44158/files", + "html_url": "https://github.com/huggingface/transformers/pull/44158", "labels": [], - "merged": false, - "number": 44124, + "merged": true, + "number": 44158, "review_comments_count": 0, "state": "closed", - "title": "feat: add eval_on_end to Trainer for final evaluation", - "updated_at": "2026-02-18T14:14:16Z" + "title": "fix bug with position_ids on qwen3-vl models, such that position_ids include text position", + "updated_at": "2026-02-23T14:53:33Z" }, { - "additions": 33, - "author": "cyyever", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.", - "changed_files": 1, + "additions": 689, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? As per title, gets rid of `if/else` per attn implementation", + "changed_files": 24, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44123", - "created_at": "2026-02-18T08:22:57Z", - "deletions": 22, + "comments_count": 8, + "conversation_url": "https://github.com/huggingface/transformers/pull/44157", + "created_at": "2026-02-19T14:49:49Z", + "deletions": 834, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44123/files", - "html_url": "https://github.com/huggingface/transformers/pull/44123", + "files_url": "https://github.com/huggingface/transformers/pull/44157/files", + "html_url": "https://github.com/huggingface/transformers/pull/44157", "labels": [], "merged": false, - "number": 44123, - "review_comments_count": 0, + "number": 44157, + "review_comments_count": 10, "state": "open", - "title": "Avoid device sync in training loss accumulation", - "updated_at": "2026-03-30T07:57:16Z" + "title": "Use correct mask for packed inputs in Qwen-VL ", + "updated_at": "2026-02-24T13:13:43Z" }, { - "additions": 158, - "author": "adityuhkapoor", - "author_association": "NONE", - "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026", - "changed_files": 4, + "additions": 2, + "author": "SunMarc", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? This PR fixes a minor error when using aqml quantization. We specified the wrong argument.", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44122", - "created_at": "2026-02-18T06:35:09Z", - "deletions": 2, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44156", + "created_at": "2026-02-19T14:35:38Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44122/files", - "html_url": "https://github.com/huggingface/transformers/pull/44122", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44156/files", + "html_url": "https://github.com/huggingface/transformers/pull/44156", + "labels": [], "merged": false, - "number": 44122, + "number": 44156, "review_comments_count": 0, "state": "closed", - "title": "Add BnB 4-bit embedding quantization support", - "updated_at": "2026-02-18T14:27:25Z" + "title": "Fix aqml `modules_to_not_convert`", + "updated_at": "2026-03-27T16:50:02Z" }, { - "additions": 14, - "author": "tirth8205", - "author_association": "NONE", - "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026", + "additions": 44, + "author": "Aatman09", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "# What does this PR do? \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026", + "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44090", - "created_at": "2026-02-17T13:15:07Z", - "deletions": 4, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44127", + "created_at": "2026-02-18T10:41:48Z", + "deletions": 8, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44090/files", - "html_url": "https://github.com/huggingface/transformers/pull/44090", + "files_url": "https://github.com/huggingface/transformers/pull/44127/files", + "html_url": "https://github.com/huggingface/transformers/pull/44127", "labels": [], "merged": true, - "number": 44090, + "number": 44127, "review_comments_count": 0, "state": "closed", - "title": "Update post proc", - "updated_at": "2026-02-18T15:34:18Z" + "title": "AutoTokenizer ignores config when model_type is None", + "updated_at": "2026-02-18T14:47:52Z" }, { - "additions": 113, - "author": "preetam1407", - "author_association": "CONTRIBUTOR", - "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026", + "additions": 17, + "author": "Cyrilvallez", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848", "changed_files": 2, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44089", - "created_at": "2026-02-17T11:37:18Z", - "deletions": 294, + "conversation_url": "https://github.com/huggingface/transformers/pull/44126", + "created_at": "2026-02-18T09:58:49Z", + "deletions": 40, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44089/files", - "html_url": "https://github.com/huggingface/transformers/pull/44089", + "files_url": "https://github.com/huggingface/transformers/pull/44126/files", + "html_url": "https://github.com/huggingface/transformers/pull/44126", "labels": [], - "merged": false, - "number": 44089, + "merged": true, + "number": 44126, "review_comments_count": 0, "state": "closed", - "title": "Refactor t5 output tracing", - "updated_at": "2026-02-17T13:45:23Z" + "title": "Simplify input preparation in generate", + "updated_at": "2026-02-18T10:30:48Z" }, { - "additions": 41, - "author": "Zephyr-Blessed", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026", + "additions": 8, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44088", - "created_at": "2026-02-17T11:32:42Z", - "deletions": 129, + "conversation_url": "https://github.com/huggingface/transformers/pull/44125", + "created_at": "2026-02-18T09:34:54Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44088/files", - "html_url": "https://github.com/huggingface/transformers/pull/44088", + "files_url": "https://github.com/huggingface/transformers/pull/44125/files", + "html_url": "https://github.com/huggingface/transformers/pull/44125", "labels": [], - "merged": false, - "number": 44088, - "review_comments_count": 0, + "merged": true, + "number": 44125, + "review_comments_count": 2, "state": "closed", - "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple", - "updated_at": "2026-02-17T11:41:32Z" + "title": "Raise informative error when loading video processors", + "updated_at": "2026-02-20T08:23:35Z" }, { - "additions": 16, - "author": "huyxdang", - "author_association": "NONE", - "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026", + "additions": 10, + "author": "mariam851", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44087", - "created_at": "2026-02-17T11:30:25Z", - "deletions": 33, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44124", + "created_at": "2026-02-18T08:52:23Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44087/files", - "html_url": "https://github.com/huggingface/transformers/pull/44087", + "files_url": "https://github.com/huggingface/transformers/pull/44124/files", + "html_url": "https://github.com/huggingface/transformers/pull/44124", "labels": [], "merged": false, - "number": 44087, + "number": 44124, "review_comments_count": 0, "state": "closed", - "title": "Refactor Mamba2 to use standardized output tracing", - "updated_at": "2026-03-11T02:08:22Z" + "title": "feat: add eval_on_end to Trainer for final evaluation", + "updated_at": "2026-02-18T14:14:16Z" }, { - "additions": 16, - "author": "Zephyr-Blessed", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026", + "additions": 33, + "author": "cyyever", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44086", - "created_at": "2026-02-17T11:21:22Z", - "deletions": 48, + "conversation_url": "https://github.com/huggingface/transformers/pull/44123", + "created_at": "2026-02-18T08:22:57Z", + "deletions": 22, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44086/files", - "html_url": "https://github.com/huggingface/transformers/pull/44086", + "files_url": "https://github.com/huggingface/transformers/pull/44123/files", + "html_url": "https://github.com/huggingface/transformers/pull/44123", "labels": [], "merged": false, - "number": 44086, + "number": 44123, "review_comments_count": 0, "state": "open", - "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators", - "updated_at": "2026-02-17T11:22:25Z" + "title": "Avoid device sync in training loss accumulation", + "updated_at": "2026-03-30T07:57:16Z" }, { - "additions": 37, - "author": "Zephyr-Blessed", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026", - "changed_files": 1, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44085", - "created_at": "2026-02-17T11:09:55Z", - "deletions": 108, + "additions": 158, + "author": "adityuhkapoor", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026", + "changed_files": 4, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44122", + "created_at": "2026-02-18T06:35:09Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44085/files", - "html_url": "https://github.com/huggingface/transformers/pull/44085", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44122/files", + "html_url": "https://github.com/huggingface/transformers/pull/44122", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44085, + "number": 44122, "review_comments_count": 0, - "state": "open", - "title": "Refactor RemBERT to use output tracing decorators", - "updated_at": "2026-02-17T11:10:59Z" + "state": "closed", + "title": "Add BnB 4-bit embedding quantization support", + "updated_at": "2026-02-18T14:27:25Z" }, { - "additions": 37, - "author": "Zephyr-Blessed", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026", + "additions": 14, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026", "changed_files": 1, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44084", - "created_at": "2026-02-17T11:08:48Z", - "deletions": 108, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44120", + "created_at": "2026-02-17T23:56:48Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44084/files", - "html_url": "https://github.com/huggingface/transformers/pull/44084", - "labels": [], + "files_url": "https://github.com/huggingface/transformers/pull/44120/files", + "html_url": "https://github.com/huggingface/transformers/pull/44120", + "labels": [ + "Code agent slop" + ], "merged": false, - "number": 44084, + "number": 44120, "review_comments_count": 0, "state": "closed", - "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators", - "updated_at": "2026-02-17T11:41:38Z" + "title": "fix: allow image_transforms.resize to handle negative values after normalization", + "updated_at": "2026-02-18T14:08:54Z" }, { - "additions": 2857, - "author": "3outeille", - "author_association": "MEMBER", - "body_excerpt": "- TODO: - fsdp => faire comme tp en mode fsdp_plan manual qui devient l'auto par d\u00e9faut --- This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSD\u2026", - "changed_files": 98, + "additions": 1, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44083", - "created_at": "2026-02-17T10:57:06Z", - "deletions": 201, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44119", + "created_at": "2026-02-17T23:53:20Z", + "deletions": 1, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44083/files", - "html_url": "https://github.com/huggingface/transformers/pull/44083", + "files_url": "https://github.com/huggingface/transformers/pull/44119/files", + "html_url": "https://github.com/huggingface/transformers/pull/44119", "labels": [], "merged": false, - "number": 44083, - "review_comments_count": 24, - "state": "open", - "title": "FSDP2 native support in transformers ", - "updated_at": "2026-04-10T08:35:55Z" + "number": 44119, + "review_comments_count": 0, + "state": "closed", + "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer", + "updated_at": "2026-02-18T14:04:47Z" }, { - "additions": 6, - "author": "Cyrilvallez", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026", - "changed_files": 3, + "additions": 32, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026", + "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44082", - "created_at": "2026-02-17T10:09:03Z", - "deletions": 20, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44118", + "created_at": "2026-02-17T23:31:31Z", + "deletions": 0, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44082/files", - "html_url": "https://github.com/huggingface/transformers/pull/44082", - "labels": [], - "merged": true, - "number": 44082, + "files_url": "https://github.com/huggingface/transformers/pull/44118/files", + "html_url": "https://github.com/huggingface/transformers/pull/44118", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44118, "review_comments_count": 0, "state": "closed", - "title": "Fix patchtsmixer call to post_init", - "updated_at": "2026-02-17T11:05:40Z" + "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields", + "updated_at": "2026-02-18T14:18:12Z" }, { - "additions": 48, - "author": "ArthurZucker", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez", - "changed_files": 6, + "additions": 27, + "author": "dtiourine", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44081", - "created_at": "2026-02-17T09:54:01Z", - "deletions": 0, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44116", + "created_at": "2026-02-17T21:52:13Z", + "deletions": 102, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44081/files", - "html_url": "https://github.com/huggingface/transformers/pull/44081", + "files_url": "https://github.com/huggingface/transformers/pull/44116/files", + "html_url": "https://github.com/huggingface/transformers/pull/44116", "labels": [], - "merged": true, - "number": 44081, - "review_comments_count": 1, - "state": "closed", - "title": "add default flash impl", - "updated_at": "2026-02-19T11:29:54Z" + "merged": false, + "number": 44116, + "review_comments_count": 0, + "state": "open", + "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface", + "updated_at": "2026-02-17T21:53:23Z" }, { - "additions": 22, - "author": "tomaarsen", + "additions": 2, + "author": "Deep-unlearning", "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026", + "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 4, - "conversation_url": "https://github.com/huggingface/transformers/pull/44080", - "created_at": "2026-02-17T09:53:36Z", - "deletions": 8, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44115", + "created_at": "2026-02-17T21:32:55Z", + "deletions": 2, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44080/files", - "html_url": "https://github.com/huggingface/transformers/pull/44080", + "files_url": "https://github.com/huggingface/transformers/pull/44115/files", + "html_url": "https://github.com/huggingface/transformers/pull/44115", "labels": [], "merged": true, - "number": 44080, + "number": 44115, "review_comments_count": 0, "state": "closed", - "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`", - "updated_at": "2026-02-20T10:08:38Z" + "title": "[docs] fix broken chat_templating links in tasks docs", + "updated_at": "2026-02-23T16:27:57Z" }, { - "additions": 19, - "author": "tomaarsen", - "author_association": "MEMBER", - "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026", - "changed_files": 10, + "additions": 716, + "author": "23atharvaS", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026", + "changed_files": 19, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, - "conversation_url": "https://github.com/huggingface/transformers/pull/44078", - "created_at": "2026-02-17T09:15:34Z", - "deletions": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44114", + "created_at": "2026-02-17T21:17:35Z", + "deletions": 1237, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44078/files", - "html_url": "https://github.com/huggingface/transformers/pull/44078", + "files_url": "https://github.com/huggingface/transformers/pull/44114/files", + "html_url": "https://github.com/huggingface/transformers/pull/44114", "labels": [], - "merged": true, - "number": 44078, - "review_comments_count": 6, - "state": "closed", - "title": "[`fix`] Set input_modalities on various architectures that aren't just text", - "updated_at": "2026-02-24T10:39:31Z" + "merged": false, + "number": 44114, + "review_comments_count": 0, + "state": "open", + "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators", + "updated_at": "2026-02-18T20:34:53Z" }, { - "additions": 11, - "author": "mmahjoub5", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026", + "additions": 5, + "author": "harshaljanjani", + "author_association": "CONTRIBUTOR", + "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44076", - "created_at": "2026-02-17T08:46:55Z", - "deletions": 62, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44113", + "created_at": "2026-02-17T20:01:32Z", + "deletions": 7, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44076/files", - "html_url": "https://github.com/huggingface/transformers/pull/44076", + "files_url": "https://github.com/huggingface/transformers/pull/44113/files", + "html_url": "https://github.com/huggingface/transformers/pull/44113", "labels": [], - "merged": false, - "number": 44076, - "review_comments_count": 0, - "state": "open", - "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs", - "updated_at": "2026-02-18T04:11:40Z" + "merged": true, + "number": 44113, + "review_comments_count": 2, + "state": "closed", + "title": "fix(testing): Update stale device override test in GraniteSpeech", + "updated_at": "2026-02-19T11:24:29Z" }, { - "additions": 66, - "author": "aman-coder03", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026", - "changed_files": 2, + "additions": 30, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44074", - "created_at": "2026-02-17T08:23:25Z", - "deletions": 52, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44111", + "created_at": "2026-02-17T19:38:02Z", + "deletions": 59, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44074/files", - "html_url": "https://github.com/huggingface/transformers/pull/44074", + "files_url": "https://github.com/huggingface/transformers/pull/44111/files", + "html_url": "https://github.com/huggingface/transformers/pull/44111", "labels": [], "merged": false, - "number": 44074, + "number": 44111, "review_comments_count": 0, - "state": "open", - "title": "[TextNet] Refactor output tracing using capture_outputs decorator", - "updated_at": "2026-02-17T11:28:11Z" + "state": "closed", + "title": "refactor(poolformer): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:22Z" }, { - "additions": 32, - "author": "aman-coder03", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026", + "additions": 28, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44073", - "created_at": "2026-02-17T08:16:59Z", - "deletions": 38, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44110", + "created_at": "2026-02-17T19:32:55Z", + "deletions": 101, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44073/files", - "html_url": "https://github.com/huggingface/transformers/pull/44073", + "files_url": "https://github.com/huggingface/transformers/pull/44110/files", + "html_url": "https://github.com/huggingface/transformers/pull/44110", "labels": [], "merged": false, - "number": 44073, + "number": 44110, "review_comments_count": 0, - "state": "open", - "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators", - "updated_at": "2026-02-17T11:29:01Z" + "state": "closed", + "title": "refactor(tvp): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:24Z" }, { - "additions": 12, - "author": "Siddhartha7340", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026", - "changed_files": 1, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44072", - "created_at": "2026-02-17T07:42:01Z", - "deletions": 38, + "additions": 48, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44109", + "created_at": "2026-02-17T19:23:03Z", + "deletions": 87, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44072/files", - "html_url": "https://github.com/huggingface/transformers/pull/44072", + "files_url": "https://github.com/huggingface/transformers/pull/44109/files", + "html_url": "https://github.com/huggingface/transformers/pull/44109", "labels": [], "merged": false, - "number": 44072, + "number": 44109, "review_comments_count": 0, - "state": "open", - "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026", - "updated_at": "2026-02-17T07:56:05Z" + "state": "closed", + "title": "refactor(hgnet_v2): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:25Z" }, { - "additions": 38, - "author": "ArivunidhiA", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026", + "additions": 33, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44071", - "created_at": "2026-02-17T07:19:17Z", - "deletions": 112, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44108", + "created_at": "2026-02-17T19:15:00Z", + "deletions": 82, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44071/files", - "html_url": "https://github.com/huggingface/transformers/pull/44071", + "files_url": "https://github.com/huggingface/transformers/pull/44108/files", + "html_url": "https://github.com/huggingface/transformers/pull/44108", "labels": [], "merged": false, - "number": 44071, + "number": 44108, "review_comments_count": 0, - "state": "open", - "title": "[Refactor] Migrate MPT to standardized output tracing decorators", - "updated_at": "2026-02-17T07:20:17Z" + "state": "closed", + "title": "refactor(vitdet): use output tracing decorators", + "updated_at": "2026-02-18T21:19:27Z" }, { - "additions": 272, - "author": "rudybear", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026", - "changed_files": 3, + "additions": 40, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 1, - "conversation_url": "https://github.com/huggingface/transformers/pull/44070", - "created_at": "2026-02-17T07:18:13Z", - "deletions": 0, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44107", + "created_at": "2026-02-17T19:04:42Z", + "deletions": 112, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44070/files", - "html_url": "https://github.com/huggingface/transformers/pull/44070", + "files_url": "https://github.com/huggingface/transformers/pull/44107/files", + "html_url": "https://github.com/huggingface/transformers/pull/44107", "labels": [], "merged": false, - "number": 44070, + "number": 44107, "review_comments_count": 0, - "state": "open", - "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture", - "updated_at": "2026-02-17T07:21:26Z" + "state": "closed", + "title": "refactor(mra): use output tracing decorators", + "updated_at": "2026-02-18T21:19:29Z" }, { - "additions": 26, - "author": "cyyever", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.", - "changed_files": 26, + "additions": 47, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44069", - "created_at": "2026-02-17T06:49:38Z", - "deletions": 48, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44106", + "created_at": "2026-02-17T18:59:25Z", + "deletions": 132, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44069/files", - "html_url": "https://github.com/huggingface/transformers/pull/44069", + "files_url": "https://github.com/huggingface/transformers/pull/44106/files", + "html_url": "https://github.com/huggingface/transformers/pull/44106", "labels": [], - "merged": true, - "number": 44069, + "merged": false, + "number": 44106, "review_comments_count": 0, "state": "closed", - "title": "Use torch.isfinite", - "updated_at": "2026-02-18T01:04:19Z" + "title": "Refactor yoso to use automatic output tracing", + "updated_at": "2026-02-18T21:19:30Z" }, { - "additions": 42, - "author": "mtthw13", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026", - "changed_files": 2, + "additions": 39, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44068", - "created_at": "2026-02-17T06:13:37Z", - "deletions": 119, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44105", + "created_at": "2026-02-17T18:54:40Z", + "deletions": 127, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44068/files", - "html_url": "https://github.com/huggingface/transformers/pull/44068", + "files_url": "https://github.com/huggingface/transformers/pull/44105/files", + "html_url": "https://github.com/huggingface/transformers/pull/44105", "labels": [], "merged": false, - "number": 44068, + "number": 44105, "review_comments_count": 0, - "state": "open", - "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators", - "updated_at": "2026-02-18T08:30:32Z" + "state": "closed", + "title": "Refactor lilt to use automatic output tracing", + "updated_at": "2026-02-18T21:19:32Z" }, { - "additions": 63, - "author": "23atharvaS", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026", - "changed_files": 3, + "additions": 66, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44067", - "created_at": "2026-02-17T05:25:26Z", - "deletions": 1, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44104", + "created_at": "2026-02-17T18:43:44Z", + "deletions": 207, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44067/files", - "html_url": "https://github.com/huggingface/transformers/pull/44067", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44104/files", + "html_url": "https://github.com/huggingface/transformers/pull/44104", + "labels": [], "merged": false, - "number": 44067, + "number": 44104, "review_comments_count": 0, "state": "closed", - "title": "Add `eval_on_end` argument to Trainer for final evaluation after training", - "updated_at": "2026-02-17T13:32:34Z" + "title": "Refactor megatron_bert to use automatic output tracing", + "updated_at": "2026-02-18T21:19:34Z" }, { - "additions": 35, - "author": "Jay-IIT", - "author_association": "FIRST_TIME_CONTRIBUTOR", - "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026", - "changed_files": 2, - "cluster_id": "cluster-43979-21", - "cluster_ids": [ - "cluster-43979-21" - ], - "cluster_role": "member", - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44066", - "created_at": "2026-02-17T05:12:11Z", - "deletions": 107, + "additions": 53, + "author": "engmohamedsalah", + "author_association": "NONE", + "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44103", + "created_at": "2026-02-17T18:04:48Z", + "deletions": 76, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44066/files", - "html_url": "https://github.com/huggingface/transformers/pull/44066", + "files_url": "https://github.com/huggingface/transformers/pull/44103/files", + "html_url": "https://github.com/huggingface/transformers/pull/44103", "labels": [], "merged": false, - "number": 44066, + "number": 44103, "review_comments_count": 0, - "state": "open", - "title": "Refactor GPT-J to use standardized output tracing (#43979)", - "updated_at": "2026-02-18T18:44:28Z" + "state": "closed", + "title": "Fix glm_moe_dsa", + "updated_at": "2026-02-18T19:38:11Z" }, - { - "additions": 21, - "author": "tysoncung", - "author_association": "CONTRIBUTOR", - "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026", + { + "additions": 42, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026", "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 2, - "conversation_url": "https://github.com/huggingface/transformers/pull/44065", - "created_at": "2026-02-17T02:03:57Z", - "deletions": 76, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44102", + "created_at": "2026-02-17T17:21:32Z", + "deletions": 154, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44065/files", - "html_url": "https://github.com/huggingface/transformers/pull/44065", + "files_url": "https://github.com/huggingface/transformers/pull/44102/files", + "html_url": "https://github.com/huggingface/transformers/pull/44102", "labels": [], "merged": false, - "number": 44065, + "number": 44102, "review_comments_count": 0, "state": "closed", - "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple", - "updated_at": "2026-02-25T00:49:18Z" + "title": "Refactor ibert output tracing with capture_outputs", + "updated_at": "2026-02-18T21:19:35Z" }, { - "additions": 57, - "author": "mariam851", - "author_association": "CONTRIBUTOR", - "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026", + "additions": 210, + "author": "aman-coder03", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026", "changed_files": 2, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 0, - "conversation_url": "https://github.com/huggingface/transformers/pull/44064", - "created_at": "2026-02-17T01:10:31Z", - "deletions": 16, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44101", + "created_at": "2026-02-17T17:15:06Z", + "deletions": 194, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44064/files", - "html_url": "https://github.com/huggingface/transformers/pull/44064", - "labels": [ - "Code agent slop" - ], + "files_url": "https://github.com/huggingface/transformers/pull/44101/files", + "html_url": "https://github.com/huggingface/transformers/pull/44101", + "labels": [], "merged": false, - "number": 44064, + "number": 44101, "review_comments_count": 0, - "state": "closed", - "title": "feat: implement eval_on_end to trigger evaluation after training", - "updated_at": "2026-02-17T13:32:40Z" + "state": "open", + "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture", + "updated_at": "2026-02-19T08:08:33Z" }, { - "additions": 229, - "author": "AutumnAurelium", - "author_association": "CONTRIBUTOR", - "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026", - "changed_files": 5, + "additions": 3, + "author": "qgallouedec", + "author_association": "MEMBER", + "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026", + "changed_files": 1, "cluster_id": null, "cluster_ids": [], "cluster_role": null, - "comments_count": 5, - "conversation_url": "https://github.com/huggingface/transformers/pull/44063", - "created_at": "2026-02-17T01:07:13Z", - "deletions": 150, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44100", + "created_at": "2026-02-17T17:10:36Z", + "deletions": 3, "draft": false, - "files_url": "https://github.com/huggingface/transformers/pull/44063/files", - "html_url": "https://github.com/huggingface/transformers/pull/44063", + "files_url": "https://github.com/huggingface/transformers/pull/44100/files", + "html_url": "https://github.com/huggingface/transformers/pull/44100", "labels": [], "merged": true, - "number": 44063, - "review_comments_count": 6, + "number": 44100, + "review_comments_count": 0, "state": "closed", - "title": "Update AFMoE architecture to use v5-style MoE impl", - "updated_at": "2026-03-19T14:00:46Z" + "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps", + "updated_at": "2026-02-20T09:57:51Z" }, { "additions": 2, - "author": "tarekziade", + "author": "qgallouedec", "author_association": "MEMBER", - "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026", - "changed_files": 1, + "body_excerpt": "# What does this PR do?