diff --git "a/data/prs.json" "b/data/prs.json"
--- "a/data/prs.json"
+++ "b/data/prs.json"
@@ -1,27634 +1,27466 @@
 [
   {
-    "additions": 71,
-    "author": "zucchini-nlp",
+    "additions": 115,
+    "author": "IlyasMoutawwakil",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Credits to @xenova , adds an example code since it is more complicated that text LLMs. Beginner users might not know and an example is pretty much useful for them",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45398",
-    "created_at": "2026-04-13T09:28:34Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45398/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45398",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45433",
+    "created_at": "2026-04-14T14:33:39Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45433/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45433",
     "labels": [],
     "merged": false,
-    "number": 45398,
+    "number": 45433,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add example for iterative chatting with MLLMs",
-    "updated_at": "2026-04-13T09:38:27Z"
+    "title": "SonicMoe",
+    "updated_at": "2026-04-14T14:47:50Z"
   },
   {
-    "additions": 1754,
-    "author": "IlyasMoutawwakil",
+    "additions": 293,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "- Create top-level `modeling_vision_utils.py` with shared pure functions: `get_vision_cu_seqlens`, `get_rotary_pos_ids`, `get_rotary_pos_ids_interleaved`, `get_window_index`, `get_pos_embed_indices` - Move audio precompute functions (`chun\u2026",
-    "changed_files": 34,
+    "body_excerpt": "# What does this PR do? This patch adds the pipeline in our type checker, and also adds mccabe complexity in our Ruff linter with a 75 threshold for now. To reduce complexity, logical blocks are now splitted into private functions to impro\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45396",
-    "created_at": "2026-04-13T08:46:10Z",
-    "deletions": 1918,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45396/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45396",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45432",
+    "created_at": "2026-04-14T14:04:07Z",
+    "deletions": 192,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45432/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45432",
     "labels": [],
     "merged": false,
-    "number": 45396,
+    "number": 45432,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Extract dynamic vision/audio tensors into standalone pure functions",
-    "updated_at": "2026-04-13T09:43:50Z"
+    "title": "chore(qa): split pipeline and add type checking",
+    "updated_at": "2026-04-14T14:14:13Z"
   },
   {
-    "additions": 9,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary Fixes #45137. Since #41147, attention layers are decorated with `@use_kernelized_func(apply_rotary_pos_emb)` which attaches a `rotary_fn` child `nn.Module` at init when the `kernels` library is available. DeepSpeed ZeRO-3's para\u2026",
+    "additions": 3,
+    "author": "ambroiseodt",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fix checkpoint path in Dinov2 model_docs. # What does this PR do? Fixes the wrong checkpoint path in Dinov2 model_docs #45431 Changes made: the previous checkpoint (google/dinov2-base-patch16-224) does not exist and has been changed to the\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45395",
-    "created_at": "2026-04-13T08:35:06Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45430",
+    "created_at": "2026-04-14T13:45:51Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45395/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45395",
+    "files_url": "https://github.com/huggingface/transformers/pull/45430/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45430",
     "labels": [],
     "merged": false,
-    "number": 45395,
+    "number": 45430,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix IndexError with DeepSpeed ZeRO-3 when kernels rotary is active",
-    "updated_at": "2026-04-13T09:07:04Z"
+    "title": "[Doc] Correct checkpoint path in Dinov2 model_docs ",
+    "updated_at": "2026-04-14T13:55:07Z"
   },
   {
-    "additions": 33,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 4,
+    "additions": 5,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Improve workflow file",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45394",
-    "created_at": "2026-04-13T08:28:21Z",
-    "deletions": 2,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45394/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45394",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45429",
+    "created_at": "2026-04-14T09:44:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45429/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45429",
     "labels": [],
     "merged": false,
-    "number": 45394,
+    "number": 45429,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(x_clip): auto-fix failing tests",
-    "updated_at": "2026-04-13T08:29:21Z"
+    "state": "closed",
+    "title": "Improve workflow file",
+    "updated_at": "2026-04-14T09:49:38Z"
   },
   {
-    "additions": 0,
-    "author": "tarekziade",
+    "additions": 75,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Remove accidental addition of an ignored file",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45392",
-    "created_at": "2026-04-13T07:14:44Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45428",
+    "created_at": "2026-04-14T09:39:32Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45392/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45392",
+    "files_url": "https://github.com/huggingface/transformers/pull/45428/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45428",
     "labels": [],
     "merged": true,
-    "number": 45392,
+    "number": 45428,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "remove cache file from tree",
-    "updated_at": "2026-04-13T08:24:27Z"
+    "title": "[`fix`] PEFT integration fixes preventing save/load & integration",
+    "updated_at": "2026-04-14T11:27:26Z"
   },
   {
-    "additions": 461,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Similarly to the VLM tester, this patch introduces a audio tester class, used in - Qwen2Audio - AudioFlamingo3 - GraniteSpeech Adding a new audio-language model using this will require ~8-20 lines for the tester (vs\u2026",
-    "changed_files": 5,
+    "additions": 1,
+    "author": "Aftabbs",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #45341. `get_device_properties()` in `testing_utils.py` calls `torch.cuda.get_device_capability()` whenever `IS_CUDA_SYSTEM or IS_ROCM_SYSTEM` is `True`. This raises a `RuntimeError` on environments where CUD\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45391",
-    "created_at": "2026-04-13T06:32:49Z",
-    "deletions": 471,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45427",
+    "created_at": "2026-04-14T08:56:45Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45391/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45391",
+    "files_url": "https://github.com/huggingface/transformers/pull/45427/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45427",
     "labels": [],
     "merged": false,
-    "number": 45391,
+    "number": 45427,
     "review_comments_count": 0,
     "state": "open",
-    "title": "audio tester class",
-    "updated_at": "2026-04-13T07:32:46Z"
+    "title": "fix(testing_utils): guard get_device_capability() with torch.cuda.is_available()",
+    "updated_at": "2026-04-14T08:56:45Z"
   },
   {
-    "additions": 32,
-    "author": "ruben-aghayan",
+    "additions": 1333,
+    "author": "kmswin1",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fails loudly when using repetition penalty on input_embed without input_ids args and requires the user to pass in input_ids as well. Previously, users were able to call repetition penalty on generate calls w\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Add A.X K1 model architecture What does this PR do? This PR adds support for A.X K1, a large-scale Mixture-of-Experts (MoE) language model developed by [SK Telecom](https://huggingface.co/skt). A.X K1 contains 519B total parameters with 33\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45389",
-    "created_at": "2026-04-13T03:57:04Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45426",
+    "created_at": "2026-04-14T07:23:05Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45389/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45389",
+    "files_url": "https://github.com/huggingface/transformers/pull/45426/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45426",
     "labels": [],
     "merged": false,
-    "number": 45389,
+    "number": 45426,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Require input_ids for repetition penalty",
-    "updated_at": "2026-04-13T06:06:35Z"
+    "title": "Feature/add axk1",
+    "updated_at": "2026-04-14T07:38:58Z"
   },
   {
-    "additions": 65,
-    "author": "albertorkive",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Makes `Gemma4ClippableLinear` inherit from `nn.Linear` instead of wrapping one via composition, enabling PEFT/LoRA to discover and target vision/audio encoder layers. **Problem:** PEFT's LoRA module discovery uses `\u2026",
-    "changed_files": 3,
+    "additions": 129,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds type checking to `modeling_utils.py`",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45388",
-    "created_at": "2026-04-12T17:02:39Z",
-    "deletions": 31,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45425",
+    "created_at": "2026-04-14T06:56:00Z",
+    "deletions": 69,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45388/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45388",
+    "files_url": "https://github.com/huggingface/transformers/pull/45425/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45425",
     "labels": [],
     "merged": false,
-    "number": 45388,
+    "number": 45425,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Make Gemma4ClippableLinear inherit from nn.Linear for PEFT/LoRA compatibility",
-    "updated_at": "2026-04-12T17:03:41Z"
+    "title": "chore(typing): added modeling_utils to ty",
+    "updated_at": "2026-04-14T13:21:45Z"
   },
   {
-    "additions": 35,
-    "author": "albertorkive",
+    "additions": 203,
+    "author": "louzongzhi",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes `attn_implementation=\"flash_attention_3\"` which is currently broken for the most common FA3 install method \u2014 the hopper wheel built from `flash-attention/hopper/`. **Three issues fixed:** 1. **`is_flash_attn_3\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR implements **IndexCache** support for GLM5's DeepSeek Sparse Attention (DSA), enabling cross-layer index reuse to accelerate long-context inference. IndexCache accelerates sparse attention by reusing top-k t\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45387",
-    "created_at": "2026-04-12T17:02:14Z",
-    "deletions": 15,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45424",
+    "created_at": "2026-04-14T05:14:08Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45387/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45387",
+    "files_url": "https://github.com/huggingface/transformers/pull/45424/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45424",
     "labels": [],
     "merged": false,
-    "number": 45387,
-    "review_comments_count": 0,
+    "number": 45424,
+    "review_comments_count": 9,
     "state": "open",
-    "title": "Fix flash_attention_3 detection and import for hopper wheel installs",
-    "updated_at": "2026-04-12T17:32:42Z"
+    "title": "Add IndexCache support for GLM5 DSA",
+    "updated_at": "2026-04-14T14:36:24Z"
   },
   {
-    "additions": 24,
-    "author": "UsamaKenway",
+    "additions": 68,
+    "author": "Yanis01682",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Optimizes memory usage when loading GGUF models by performing dtype casting immediately after dequantization. While I was adding the support for Gemma4 in this PR #45296, i noticed this issue that the GGUF tensors are dequantized to `float\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary - preserve existing void/ignore label 255 when educe_label is applied - only reduce non-ignore class ids by 1 for both torch and PIL image processors - add a SegFormer regression test covering",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45386",
-    "created_at": "2026-04-12T13:17:17Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45423",
+    "created_at": "2026-04-14T01:47:54Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45386/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45386",
+    "files_url": "https://github.com/huggingface/transformers/pull/45423/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45423",
     "labels": [],
     "merged": false,
-    "number": 45386,
+    "number": 45423,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[GGUF] Reduce peak RAM usage by casting dequantized tensors early during load",
-    "updated_at": "2026-04-12T15:18:56Z"
+    "title": "Fix void segmentation map label reduction",
+    "updated_at": "2026-04-14T06:05:14Z"
   },
   {
-    "additions": 15,
-    "author": "songyuc",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Loading `openai/clip-vit-base-patch32` currently reports the following keys as unexpected: - `text_model.embeddings.position_ids` - `vision_model.embeddings.position_ids` In the current CLIP implementation, these b\u2026",
-    "changed_files": 2,
+    "additions": 58,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "In `apply_chat_template`, drop the `content` key from messages when its value is `None` before passing to the Jinja template. ## Why this is a bug fix, not a breaking change `content=None` means \"there is no content\", it is semantically id\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45385",
-    "created_at": "2026-04-12T12:53:49Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45422",
+    "created_at": "2026-04-14T00:53:00Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45385/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45385",
+    "files_url": "https://github.com/huggingface/transformers/pull/45422/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45422",
     "labels": [],
     "merged": false,
-    "number": 45385,
+    "number": 45422,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Ignore CLIP position_ids in unexpected key loading report",
-    "updated_at": "2026-04-12T15:05:10Z"
+    "title": "Drop `content=None` from messages in `apply_chat_template`",
+    "updated_at": "2026-04-14T01:05:04Z"
   },
   {
-    "additions": 25,
-    "author": "GitGlimpse895",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `StoppingCriteriaList.__call__` previously evaluated every registered criterion unconditionally on every generation step, even after `is_done` was already `True` for all sequences in the batch. This adds a single `i\u2026",
-    "changed_files": 2,
+    "additions": 117,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Split off from the RF-DETR PR (#36895). These are changes to `core_model_loading.py` and `test_modeling_common.py` that are needed to load and save RF-DETR checkpoints correctly and in the same format as the original roboflow checkpoints,\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45384",
-    "created_at": "2026-04-12T10:14:58Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45421",
+    "created_at": "2026-04-13T23:51:08Z",
+    "deletions": 198,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45384/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45384",
+    "files_url": "https://github.com/huggingface/transformers/pull/45421/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45421",
     "labels": [],
     "merged": false,
-    "number": 45384,
+    "number": 45421,
     "review_comments_count": 0,
     "state": "open",
-    "title": "generation/stopping_criteria: short-circuit StoppingCriteriaList when all sequences are done",
-    "updated_at": "2026-04-12T10:32:38Z"
+    "title": "Improve nested `base_model_prefix` handling in weight conversion and loading",
+    "updated_at": "2026-04-14T00:08:59Z"
   },
   {
-    "additions": 6,
-    "author": "Aftabbs",
-    "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #45290. `apply_chat_template(tokenize=True)` raises `KeyError: 'content'` when a conversation contains an assistant message that has `tool_calls` but no `content` key: ```python processor.apply_chat_template( [[ {\"role\u2026",
-    "changed_files": 1,
+    "additions": 40,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, we do not want to have proper `nn.Module`s to be registered for kernels exchanged functions - they are not proper modules (and they are never called as such)! They act as exchange format for kernels but functionally they shou\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45383",
-    "created_at": "2026-04-12T08:48:26Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45383/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45383",
-    "labels": [
-      "Code agent slop"
-    ],
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45420",
+    "created_at": "2026-04-13T23:36:14Z",
+    "deletions": 20,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45420/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45420",
+    "labels": [],
     "merged": false,
-    "number": 45383,
+    "number": 45420,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(processing): guard message content access in apply_chat_template",
-    "updated_at": "2026-04-13T09:34:50Z"
+    "state": "open",
+    "title": "[`Kernels`] Fix kernel function registration",
+    "updated_at": "2026-04-13T23:49:46Z"
   },
   {
-    "additions": 334,
-    "author": "zFlux",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Adds conversion from `facebook/audiogen-medium` (AudioCraft Hub layout: `state_dict.bin` + `compression_state_dict.bin`) to `MusicgenForConditionalGeneration`. - `convert_audiogen_transformers.py` \u2014 reuses `rename_state_dict` fr\u2026",
-    "changed_files": 3,
+    "additions": 42,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "- `get_processor_inputs_from_messages` builds a new dict per message with only `role` and `content`, dropping `tool_calls` and `tool_call_id`. This means `apply_chat_template` can't properly render multi-turn tool-use conversations. - Forw\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45382",
-    "created_at": "2026-04-12T03:38:01Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45418",
+    "created_at": "2026-04-13T19:44:03Z",
     "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45382/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45382",
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45418/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45418",
     "labels": [],
     "merged": false,
-    "number": 45382,
-    "review_comments_count": 0,
+    "number": 45418,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Add AudioGen (AudioCraft) to MusicGen conversion scripts",
-    "updated_at": "2026-04-12T03:39:00Z"
+    "title": "[serve] Forward `tool_calls`/`tool_call_id` in processor inputs",
+    "updated_at": "2026-04-14T09:26:05Z"
   },
   {
-    "additions": 11,
-    "author": "Brianzhengca",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45380",
-    "created_at": "2026-04-11T21:41:32Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45380/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45380",
-    "labels": [],
-    "merged": false,
-    "number": 45380,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix Qwen3_5MoeVisionConfig deepstack_visual_indexes silently dropped by @strict (Issue: https://github.com/huggingface/transformers/issues/45375)",
-    "updated_at": "2026-04-11T22:02:50Z"
-  },
-  {
-    "additions": 9,
-    "author": "hijingsong",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Problem The `@strict` decorator on `Qwen3_5MoeVisionConfig` silently drops the `deepstack_visual_indexes` field during config loading because it is not declared as a class attribute. Every Qwen3.5 MoE model on HuggingFace ships with thi\u2026",
-    "changed_files": 2,
+    "additions": 746,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds type checking to `src/transformers/*.py`",
+    "changed_files": 42,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45379",
-    "created_at": "2026-04-11T19:40:19Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45415",
+    "created_at": "2026-04-13T17:09:40Z",
+    "deletions": 517,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45379/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45379",
+    "files_url": "https://github.com/huggingface/transformers/pull/45415/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45415",
     "labels": [],
     "merged": false,
-    "number": 45379,
+    "number": 45415,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(config): add deepstack_visual_indexes to Qwen3_5MoeVisionConfig",
-    "updated_at": "2026-04-11T19:41:25Z"
+    "title": "Adds type checking to `src/transformers/*py`",
+    "updated_at": "2026-04-13T18:09:59Z"
   },
   {
-    "additions": 5,
-    "author": "hijingsong",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Fix: Guard ReasoningEffort import for older mistral_common versions Fixes #45372 ### Problem `ReasoningEffort` was added in `mistral-common>=1.10.0`, but the import in `tokenization_mistral_common.py` was unconditional within the `is_mi\u2026",
-    "changed_files": 1,
+    "additions": 12,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary Fixes #45137. Re-opened from #45395 on a same-repo branch so CI can run. Since #41147, attention layers are decorated with `@use_kernelized_func(apply_rotary_pos_emb)` which attaches a `rotary_fn` child `nn.Module` at init when\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45378",
-    "created_at": "2026-04-11T18:26:14Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45414",
+    "created_at": "2026-04-13T16:19:15Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45378/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45378",
-    "labels": [],
-    "merged": false,
-    "number": 45378,
+    "files_url": "https://github.com/huggingface/transformers/pull/45414/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45414",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45414,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(mistral): guard ReasoningEffort import for older mistral_common versions",
-    "updated_at": "2026-04-11T18:26:14Z"
+    "state": "closed",
+    "title": "Fix IndexError with DeepSpeed ZeRO-3 when kernels rotary is active",
+    "updated_at": "2026-04-13T16:38:53Z"
   },
   {
-    "additions": 53,
-    "author": "Dippp10",
+    "additions": 12,
+    "author": "ezylopx5",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "body_excerpt": "I ran into an edge case in eta sampling where `EtaLogitsWarper` crashes if a row is fully masked (`scores == -inf` for all tokens). The previous entropy computation used `Categorical(logits=scores).entropy()`, which fails on that input. Th\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45377",
-    "created_at": "2026-04-11T14:17:32Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45413",
+    "created_at": "2026-04-13T15:54:45Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45377/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45377",
+    "files_url": "https://github.com/huggingface/transformers/pull/45413/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45413",
     "labels": [],
     "merged": false,
-    "number": 45377,
+    "number": 45413,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Enhance README documentation",
-    "updated_at": "2026-04-11T14:17:32Z"
+    "title": "Fix EtaLogitsWarper on fully masked logits",
+    "updated_at": "2026-04-13T15:54:45Z"
   },
   {
-    "additions": 625,
-    "author": "mohamad-tohidi",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? i added an example for hierarchical text classification ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contributor\u2026",
+    "additions": 12,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR fixes the response schema used in the Gemma4 conversion script. The change has already been made in the Hub repos themselves, so I'm just copying the updated schema into the codebase!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45374",
-    "created_at": "2026-04-11T10:00:07Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45411",
+    "created_at": "2026-04-13T15:31:33Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45374/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45374",
+    "files_url": "https://github.com/huggingface/transformers/pull/45411/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45411",
     "labels": [],
-    "merged": false,
-    "number": 45374,
+    "merged": true,
+    "number": 45411,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Adding hierarchical classification example",
-    "updated_at": "2026-04-11T10:00:07Z"
+    "state": "closed",
+    "title": "Fix the response schema for the gemma4 converter",
+    "updated_at": "2026-04-14T10:21:48Z"
   },
   {
-    "additions": 3,
-    "author": "HelloAnner",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #45341 When CUDA is installed but no GPU is available, `get_device_properties()` calls `torch.cuda.get_device_capability()` which fails because there is no CUDA device. The fix moves `import torch` to the top of the function and adds\u2026",
+    "additions": 2,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45371",
-    "created_at": "2026-04-11T06:22:52Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45371/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45371",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45410",
+    "created_at": "2026-04-13T14:36:01Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45410/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45410",
     "labels": [],
     "merged": false,
-    "number": 45371,
+    "number": 45410,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: check CUDA availability before calling get_device_capability",
-    "updated_at": "2026-04-13T06:40:16Z"
+    "title": "fix(altclip): fix failing tests",
+    "updated_at": "2026-04-14T05:39:18Z"
   },
   {
-    "additions": 12,
-    "author": "RudrenduPaul",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes five documentation errors in the `Gemma3nTextConfig` docstring in `modular_gemma3n.py` (and the generated `configuration_gemma3n.py`): 1. **Typo**: `\"emebeddings\"` \u2192 `\"embeddings\"` in `hidden_size_per_layer_in\u2026",
-    "changed_files": 2,
+    "additions": 606,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Full `distributed_config` integration in `from_pretrained()` \u2014 mesh creation, apply TP + FSDP, attach `model.device_mesh` - `gather_full_state_dict()` for streaming DTensor\u2192full tensor saving (rank 0 only) - `convert_strided_t\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45370",
-    "created_at": "2026-04-11T06:15:05Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45409",
+    "created_at": "2026-04-13T14:27:38Z",
+    "deletions": 223,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45370/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45370",
+    "files_url": "https://github.com/huggingface/transformers/pull/45409/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45409",
     "labels": [],
     "merged": false,
-    "number": 45370,
-    "review_comments_count": 0,
+    "number": 45409,
+    "review_comments_count": 4,
     "state": "open",
-    "title": "docs: fix 5 docstring errors in Gemma3nTextConfig (typos, grammar, formatting)",
-    "updated_at": "2026-04-11T06:16:13Z"
+    "title": "from_pretrained orchestration + distributed save/load",
+    "updated_at": "2026-04-14T15:10:57Z"
   },
   {
-    "additions": 10,
-    "author": "sharziki",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #45245 \u2014 `torch.multinomial` crashes with `RuntimeError: number of categories cannot exceed 2^24` when `num_beams * vocab_size > 16,777,216` during beam search with `do_sample=True`. **Root cause:** In `_get_top_k_continua\u2026",
-    "changed_files": 1,
+    "additions": 779,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Extends the TPStyle API (from #45028) with MoE expert parallelism and sequence parallelism support - Adds `PackedColwiseParallel`, `MoEExpertsParallel`, `PrepareModuleInputOutput`, `_AllReduceBackward` custom ParallelStyle sub\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45369",
-    "created_at": "2026-04-11T02:42:07Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45408",
+    "created_at": "2026-04-13T14:25:08Z",
+    "deletions": 256,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45369/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45369",
+    "files_url": "https://github.com/huggingface/transformers/pull/45408/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45408",
     "labels": [],
     "merged": false,
-    "number": 45369,
+    "number": 45408,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(generation): handle CUDA multinomial limit in beam search sampling",
-    "updated_at": "2026-04-11T02:55:44Z"
+    "title": "MoE expert parallelism + sequence parallelism",
+    "updated_at": "2026-04-14T14:40:08Z"
   },
   {
     "additions": 6,
-    "author": "sharziki",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #45362 \u2014 `transformers chat` crashes with `AttributeError: 'Qwen3VLProcessor' object has no attribute '_tokenizer'` when streaming responses from Qwen models. **Root cause:** `GenerateManager.generate_streaming()` and `CBG\u2026",
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "trainer: @SunMarc , pls help review, thx!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45368",
-    "created_at": "2026-04-11T02:34:32Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45407",
+    "created_at": "2026-04-13T14:18:37Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45368/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45368",
+    "files_url": "https://github.com/huggingface/transformers/pull/45407/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45407",
     "labels": [],
     "merged": false,
-    "number": 45368,
+    "number": 45407,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(serving): resolve rust tokenizer from ProcessorMixin in streaming generation",
-    "updated_at": "2026-04-11T02:34:32Z"
+    "title": "avoid wrap 4bit-quantized model into DP",
+    "updated_at": "2026-04-14T13:02:24Z"
   },
   {
-    "additions": 15,
-    "author": "jackcook",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Update to the latest Four Over Six API by adding options to specify the data type of activations, weights, and gradients individually cc @SunMarc",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, it is likely this model will be deprecated but I want to have a quick fix here by moving the instruct tokenizer to the internal testing repo",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45367",
-    "created_at": "2026-04-11T01:47:51Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45404",
+    "created_at": "2026-04-13T12:41:36Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45367/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45367",
+    "files_url": "https://github.com/huggingface/transformers/pull/45404/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45404",
     "labels": [],
-    "merged": false,
-    "number": 45367,
+    "merged": true,
+    "number": 45404,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add dtype config options for Four Over Six",
-    "updated_at": "2026-04-11T02:00:27Z"
+    "state": "closed",
+    "title": "[`Tokenizers`] Move gpt sw3 tokenizer out",
+    "updated_at": "2026-04-13T12:51:38Z"
   },
   {
-    "additions": 64,
-    "author": "owwll",
-    "author_association": "NONE",
-    "body_excerpt": "This PR addresses two separate issues: 1. **Fixes a bug in `Mistral4` RoPE dimension calculation.** The `Mistral4RotaryEmbedding` was incorrectly using the full `head_dim` to calculate the rotary dimension, instead of respecting the `parti\u2026",
-    "changed_files": 5,
+    "additions": 14,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45366",
-    "created_at": "2026-04-10T21:13:32Z",
-    "deletions": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45403",
+    "created_at": "2026-04-13T12:25:48Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45366/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45366",
+    "files_url": "https://github.com/huggingface/transformers/pull/45403/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45403",
     "labels": [],
     "merged": false,
-    "number": 45366,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix OLMoE routing and Mistral4 RoPE dimensions",
-    "updated_at": "2026-04-10T21:40:40Z"
+    "number": 45403,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "fix(clipseg): fix 2 failing tests",
+    "updated_at": "2026-04-14T14:14:53Z"
   },
   {
-    "additions": 46,
-    "author": "burtenshaw",
-    "author_association": "MEMBER",
-    "body_excerpt": "Applied the overlapping GPT-J refactor from the staged PRs: added `_can_record_outputs`, moved `GPTJModel.forward` to decorator-based output capture, switched wrapper forwards to `@can_return_tuple`, and removed manual hidden-state/attenti\u2026",
+    "additions": 10,
+    "author": "saslifat-gif",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #45397 ## What does this PR do? Fixes #45397 **Root cause:** `_load_state_dict_into_zero3_model` in `src/transformers/integrations/deepspeed.py` only iterates over `named_parameters` \u2014 never `named_buffers`. Buffers registered via `r\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45365",
-    "created_at": "2026-04-10T19:37:53Z",
-    "deletions": 101,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45402",
+    "created_at": "2026-04-13T12:21:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45365/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45365",
+    "files_url": "https://github.com/huggingface/transformers/pull/45402/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45402",
     "labels": [],
     "merged": false,
-    "number": 45365,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor GPT-J output tracing to use standardized decorators",
-    "updated_at": "2026-04-10T19:53:25Z"
+    "number": 45402,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Fix ZeRO-3 from_pretrained: load registered buffers in _load_state_dict_into_zero3_model",
+    "updated_at": "2026-04-14T13:53:37Z"
   },
   {
-    "additions": 815,
-    "author": "caiovicentino",
-    "author_association": "NONE",
-    "body_excerpt": "### Summary Adds a third backend to `QuantizedCache`: `polarquant`. Joins the existing `quanto` and `hqq` options and implements a Walsh-Hadamard rotation plus Lloyd-Max scalar quantization scheme tuned for KV cache compression. Pure PyTor\u2026",
-    "changed_files": 5,
+    "additions": 3157,
+    "author": "sachinkumarsingh092",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45364",
-    "created_at": "2026-04-10T19:02:16Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45401",
+    "created_at": "2026-04-13T11:26:48Z",
+    "deletions": 1,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45364/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45364",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45401/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45401",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
     "merged": false,
-    "number": 45364,
+    "number": 45401,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add PolarQuant backend to QuantizedCache (Hadamard-rotated Lloyd-Max)",
-    "updated_at": "2026-04-12T09:27:10Z"
+    "state": "open",
+    "title": "Add support for Voxtral-4B-TTS-2603 to transformers",
+    "updated_at": "2026-04-13T13:38:15Z"
   },
   {
-    "additions": 280,
-    "author": "michaelbenayoun",
+    "additions": 407,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title.",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45381 but it is weird, I remember checking position ids by value as well in qwen2.5 to verify that time-interval works \ud83e\udd14 update: i know why, the integration t\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45363",
-    "created_at": "2026-04-10T18:53:22Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45363/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45363",
-    "labels": [],
-    "merged": false,
-    "number": 45363,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fused kernels support",
-    "updated_at": "2026-04-10T20:20:49Z"
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45400",
+    "created_at": "2026-04-13T11:01:34Z",
+    "deletions": 375,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45400/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45400",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45400,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "Fix Qwen2.5VL temporal grid positions",
+    "updated_at": "2026-04-14T13:03:42Z"
   },
   {
-    "additions": 98,
+    "additions": 71,
     "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5497, lemme just check the reverse conversion TL;DR; the base model prefix is never appended if it is part of a bigger VLM, which was true for LLaVa. Loading CLIP chec\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Credits to @xenova , adds an example code since it is more complicated that text LLMs. Beginner users might not know and an example is pretty much useful for them",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45361",
-    "created_at": "2026-04-10T13:49:28Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45398",
+    "created_at": "2026-04-13T09:28:34Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45361/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45361",
+    "files_url": "https://github.com/huggingface/transformers/pull/45398/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45398",
     "labels": [],
     "merged": false,
-    "number": 45361,
+    "number": 45398,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add CLIP-like models in conversion to VLMs",
-    "updated_at": "2026-04-13T08:59:53Z"
+    "title": "Add example for iterative chatting with MLLMs",
+    "updated_at": "2026-04-13T09:38:27Z"
   },
   {
-    "additions": 3,
-    "author": "hanouticelina",
+    "additions": 2563,
+    "author": "IlyasMoutawwakil",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `huggingface-cli` is deprecated and no longer maintained. This PR updates the remaining references with `hf`",
-    "changed_files": 3,
+    "body_excerpt": "needed both claude and copilot's help on this one \ud83d\ude05 The idea is to make the vlms and their visual/audio encders compileable / exportable. here's a demo of the full model forward being compileable with these precomputed tensors. ```python \"\u2026",
+    "changed_files": 45,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45360",
-    "created_at": "2026-04-10T11:59:37Z",
-    "deletions": 3,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45396",
+    "created_at": "2026-04-13T08:46:10Z",
+    "deletions": 2053,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45360/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45360",
+    "files_url": "https://github.com/huggingface/transformers/pull/45396/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45396",
     "labels": [],
     "merged": false,
-    "number": 45360,
-    "review_comments_count": 0,
+    "number": 45396,
+    "review_comments_count": 23,
     "state": "open",
-    "title": "Replace deprecated `huggingface-cli` references with `hf`",
-    "updated_at": "2026-04-10T12:13:18Z"
+    "title": "Extract dynamic vision/audio tensors into standalone pure functions",
+    "updated_at": "2026-04-14T14:10:12Z"
   },
   {
-    "additions": 3,
+    "additions": 12,
     "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes #45356 ## Summary - Remove `kimi_k25` from `MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS`: its remote `TikTokenTokenizer` is the only correct backend \u2014 the model has no `tokenizer.json`, and its `added_tokens_decoder` has non-sequential\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #45137. Since #41147, attention layers are decorated with `@use_kernelized_func(apply_rotary_pos_emb)` which attaches a `rotary_fn` child `nn.Module` at init when the `kernels` library is available. DeepSpeed ZeRO-3's para\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45359",
-    "created_at": "2026-04-10T10:42:32Z",
-    "deletions": 4,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45395",
+    "created_at": "2026-04-13T08:35:06Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45359/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45359",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45395/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45395",
+    "labels": [],
     "merged": false,
-    "number": 45359,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Kimi-K2.5 tokenizer regression and _patch_mistral_regex AttributeError",
-    "updated_at": "2026-04-10T15:32:26Z"
-  },
-  {
-    "additions": 111,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/45357 finally. This was not catched in the previous fix, as the model can be reloaded correctly by `from_pretrained`, but keys are still wrongly serialized! Aft\u2026",
-    "changed_files": 24,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45358",
-    "created_at": "2026-04-10T10:19:42Z",
-    "deletions": 42,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45358/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45358",
-    "labels": [
-      "for patch"
-    ],
-    "merged": true,
-    "number": 45358,
-    "review_comments_count": 2,
+    "number": 45395,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix vlm weight mappings",
-    "updated_at": "2026-04-10T15:41:47Z"
+    "title": "Fix IndexError with DeepSpeed ZeRO-3 when kernels rotary is active",
+    "updated_at": "2026-04-13T16:18:40Z"
   },
   {
-    "additions": 2297,
-    "author": "Shikhar-S",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR introduces [PhoneticXeus](https://arxiv.org/abs/2603.29042), which is the state-of-the-art universal phone recognizer trained on 70+ languages and evaluated on ~100 languages. The model should have high util\u2026",
-    "changed_files": 16,
+    "additions": 47,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45355",
-    "created_at": "2026-04-10T04:26:05Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45394",
+    "created_at": "2026-04-13T08:28:21Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45355/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45355",
-    "labels": [
-      "Audio"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45394/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45394",
+    "labels": [],
     "merged": false,
-    "number": 45355,
+    "number": 45394,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add universal phone recognition model - PhoneticXeus",
-    "updated_at": "2026-04-13T07:50:03Z"
+    "title": "fix(x_clip): fix 8 failed test cases",
+    "updated_at": "2026-04-14T07:44:33Z"
   },
   {
-    "additions": 8,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? Gemma 4 was calculating the CE loss incorrectly and not handling gradient accumulation steps properly, leading to losses scaled up by the value of the gradient accumulation steps rather than letting the built in HF\u2026",
-    "changed_files": 4,
+    "additions": 0,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Remove accidental addition of an ignored file",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45354",
-    "created_at": "2026-04-10T02:54:56Z",
-    "deletions": 76,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45392",
+    "created_at": "2026-04-13T07:14:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45354/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45354",
+    "files_url": "https://github.com/huggingface/transformers/pull/45392/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45392",
     "labels": [],
     "merged": true,
-    "number": 45354,
+    "number": 45392,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix gemma4 gradient accumulation loss and last token incorrect labels",
-    "updated_at": "2026-04-10T10:08:12Z"
+    "title": "remove cache file from tree",
+    "updated_at": "2026-04-13T08:24:27Z"
   },
   {
-    "additions": 30,
-    "author": "wilnn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Add `**kwargs` to all methods in the `CallbackHandler` class. Previously, only the `CallbackHandler.on_push_begin()` method accepted `**kwargs`, while all other methods did not. This forces users who want to customi\u2026",
-    "changed_files": 1,
+    "additions": 461,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Similarly to the VLM tester, this patch introduces a audio tester class, used in - Qwen2Audio - AudioFlamingo3 - GraniteSpeech Adding a new audio-language model using this will require ~8-20 lines for the tester (vs\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45353",
-    "created_at": "2026-04-09T23:14:20Z",
-    "deletions": 30,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45391",
+    "created_at": "2026-04-13T06:32:49Z",
+    "deletions": 471,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45353/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45353",
+    "files_url": "https://github.com/huggingface/transformers/pull/45391/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45391",
     "labels": [],
-    "merged": true,
-    "number": 45353,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "add kwargs to all methods in the CallbackHandler class",
-    "updated_at": "2026-04-10T14:16:46Z"
+    "merged": false,
+    "number": 45391,
+    "review_comments_count": 9,
+    "state": "open",
+    "title": "audio tester class",
+    "updated_at": "2026-04-13T12:38:16Z"
   },
   {
-    "additions": 4,
-    "author": "RudrenduPaul",
+    "additions": 32,
+    "author": "ruben-aghayan",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Corrects an incorrect return type annotation on `Qwen3MoeSparseMoeBlock.forward`. The method is annotated as returning `tuple[torch.Tensor, torch.Tensor]` but actually returns a single reshaped `torch.Tensor` (see\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? This PR fails loudly when using repetition penalty on input_embed without input_ids args. Previously, users were able to call repetition penalty on generate calls with input_embeds args. Since they don't actually ha\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45352",
-    "created_at": "2026-04-09T21:53:31Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45389",
+    "created_at": "2026-04-13T03:57:04Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45352/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45352",
+    "files_url": "https://github.com/huggingface/transformers/pull/45389/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45389",
     "labels": [],
     "merged": false,
-    "number": 45352,
+    "number": 45389,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(qwen3_moe): correct return type annotation on Qwen3MoeSparseMoeBlock.forward",
-    "updated_at": "2026-04-11T06:44:32Z"
+    "title": "Require input_ids for repetition penalty",
+    "updated_at": "2026-04-14T10:22:01Z"
   },
   {
-    "additions": 2,
-    "author": "RudrenduPaul",
+    "additions": 45,
+    "author": "albertorkive",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `get_device_properties()` in `testing_utils.py` when CUDA is installed on the system but no GPU device is present (e.g., a CPU-only cloud studio with CUDA libraries installed). The function called\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Makes `Gemma4ClippableLinear` inherit from `nn.Linear` instead of wrapping one via composition, enabling PEFT/LoRA to discover and target vision/audio encoder layers. **Problem:** PEFT's LoRA module discovery uses `\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45351",
-    "created_at": "2026-04-09T21:51:42Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45388",
+    "created_at": "2026-04-12T17:02:39Z",
+    "deletions": 31,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45351/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45351",
+    "files_url": "https://github.com/huggingface/transformers/pull/45388/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45388",
     "labels": [],
     "merged": false,
-    "number": 45351,
-    "review_comments_count": 0,
+    "number": 45388,
+    "review_comments_count": 2,
     "state": "open",
-    "title": "fix(testing_utils): guard get_device_capability with torch.cuda.is_available()",
-    "updated_at": "2026-04-13T06:38:50Z"
+    "title": "Make Gemma4ClippableLinear inherit from nn.Linear for PEFT/LoRA compatibility",
+    "updated_at": "2026-04-14T13:01:19Z"
   },
   {
-    "additions": 1090,
-    "author": "mrutkows",
+    "additions": 35,
+    "author": "albertorkive",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Support new Granite 4 vision arch. <!-- Remove if not applicable --> Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026",
-    "changed_files": 8,
+    "body_excerpt": "# What does this PR do? Fixes `attn_implementation=\"flash_attention_3\"` which is currently broken for the most common FA3 install method \u2014 the hopper wheel built from `flash-attention/hopper/`. **Three issues fixed:** 1. **`is_flash_attn_3\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45350",
-    "created_at": "2026-04-09T17:46:37Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45350/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45350",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45387",
+    "created_at": "2026-04-12T17:02:14Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45387/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45387",
     "labels": [],
     "merged": false,
-    "number": 45350,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "WIP: Add support for Granite4VisionForConditionalGeneration",
-    "updated_at": "2026-04-10T12:34:50Z"
+    "number": 45387,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix flash_attention_3 detection and import for hopper wheel installs",
+    "updated_at": "2026-04-13T16:14:21Z"
   },
   {
-    "additions": 90,
-    "author": "florian6973",
+    "additions": 26,
+    "author": "UsamaKenway",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #45305 Add a regression test in `TrainerGradientAccumulationTest` to avoid passing the GAS value to Accelerate by mistake Description: I force the value of the `num_steps` parameter to be 1, and the regression\u2026",
+    "body_excerpt": "Optimizes memory usage when loading GGUF models by performing dtype casting immediately after dequantization. While I was adding the support for Gemma4 in this PR #45296, i noticed this issue that the GGUF tensors are dequantized to `float\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45349",
-    "created_at": "2026-04-09T17:24:39Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45386",
+    "created_at": "2026-04-12T13:17:17Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45349/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45349",
+    "files_url": "https://github.com/huggingface/transformers/pull/45386/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45386",
     "labels": [],
     "merged": false,
-    "number": 45349,
-    "review_comments_count": 6,
+    "number": 45386,
+    "review_comments_count": 2,
     "state": "open",
-    "title": "Fix #45305 + add regression test GAS",
-    "updated_at": "2026-04-11T18:42:18Z"
+    "title": "[GGUF] Reduce peak RAM usage by casting dequantized tensors early during load",
+    "updated_at": "2026-04-13T22:51:25Z"
   },
   {
-    "additions": 50,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #45290 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026",
-    "changed_files": 5,
+    "additions": 15,
+    "author": "songyuc",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Loading `openai/clip-vit-base-patch32` currently reports the following keys as unexpected: - `text_model.embeddings.position_ids` - `vision_model.embeddings.position_ids` In the current CLIP implementation, these b\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45348",
-    "created_at": "2026-04-09T15:59:07Z",
-    "deletions": 19,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45385",
+    "created_at": "2026-04-12T12:53:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45348/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45348",
+    "files_url": "https://github.com/huggingface/transformers/pull/45385/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45385",
     "labels": [],
     "merged": false,
-    "number": 45348,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "Fix apply_chat_template crash on tool_call messages without content",
-    "updated_at": "2026-04-11T01:40:44Z"
-  },
-  {
-    "additions": 35,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. `accelerate` destroys the dict otherwise, if it's not BOTH passed as kwarg AND part of `_skip_keys_device_placement`.......... `per_layer_input` needs to stay as a positional arg, for gradient chec\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45347",
-    "created_at": "2026-04-09T15:31:34Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45347/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45347",
-    "labels": [],
-    "merged": true,
-    "number": 45347,
+    "number": 45385,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[gemma4] Fix device map auto",
-    "updated_at": "2026-04-09T15:45:15Z"
+    "state": "open",
+    "title": "Ignore CLIP position_ids in unexpected key loading report",
+    "updated_at": "2026-04-12T15:05:10Z"
   },
   {
-    "additions": 46,
-    "author": "ionut-anghelina",
+    "additions": 25,
+    "author": "GitGlimpse895",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 15,
+    "body_excerpt": "# What does this PR do? `StoppingCriteriaList.__call__` previously evaluated every registered criterion unconditionally on every generation step, even after `is_done` was already `True` for all sequences in the batch. This adds a single `i\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45346",
-    "created_at": "2026-04-09T14:48:28Z",
-    "deletions": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45384",
+    "created_at": "2026-04-12T10:14:58Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45346/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45346",
+    "files_url": "https://github.com/huggingface/transformers/pull/45384/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45384",
     "labels": [],
     "merged": false,
-    "number": 45346,
-    "review_comments_count": 1,
+    "number": 45384,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Fix Double Application of Softmax for Router Logits in MoE models",
-    "updated_at": "2026-04-09T15:50:47Z"
+    "title": "generation/stopping_criteria: short-circuit StoppingCriteriaList when all sequences are done",
+    "updated_at": "2026-04-14T03:23:23Z"
   },
   {
-    "additions": 30,
-    "author": "ansley",
+    "additions": 6,
+    "author": "Aftabbs",
     "author_association": "NONE",
-    "body_excerpt": "The `transformers` V5 \"rm slow tokenizers\" refactor (\\#40936) aliased `LlamaTokenizerFast` to `LlamaTokenizer`, whose `__init__` unconditionally installs a SentencePiece Metaspace pre-tokenizer. This is correct for classic Llama/Llama-2 mo\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Description Fixes #45290. `apply_chat_template(tokenize=True)` raises `KeyError: 'content'` when a conversation contains an assistant message that has `tool_calls` but no `content` key: ```python processor.apply_chat_template( [[ {\"role\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45345",
-    "created_at": "2026-04-09T14:31:40Z",
-    "deletions": 14,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45383",
+    "created_at": "2026-04-12T08:48:26Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45345/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45345",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45383/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45383",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45345,
+    "number": 45383,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ByteLevel-BPE tokenizers silently breaking in `LlamaTokenizer`",
-    "updated_at": "2026-04-10T12:45:24Z"
+    "title": "fix(processing): guard message content access in apply_chat_template",
+    "updated_at": "2026-04-13T09:34:50Z"
   },
   {
-    "additions": 6,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Simple hook to display test duration. This will append inline duration per test during the run, example: ``` tests/utils/test_configuration_utils.py::ConfigPushToHubTester::test_push_to_hub [gw1] [ 90%] PASSED tests\u2026",
-    "changed_files": 1,
+    "additions": 334,
+    "author": "zFlux",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Adds conversion from `facebook/audiogen-medium` (AudioCraft Hub layout: `state_dict.bin` + `compression_state_dict.bin`) to `MusicgenForConditionalGeneration`. - `convert_audiogen_transformers.py` \u2014 reuses `rename_state_dict` fr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45344",
-    "created_at": "2026-04-09T14:22:46Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45382",
+    "created_at": "2026-04-12T03:38:01Z",
     "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45344/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45344",
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45382/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45382",
     "labels": [],
-    "merged": true,
-    "number": 45344,
+    "merged": false,
+    "number": 45382,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor: display test duration",
-    "updated_at": "2026-04-09T15:19:26Z"
+    "state": "open",
+    "title": "Add AudioGen (AudioCraft) to MusicGen conversion scripts",
+    "updated_at": "2026-04-12T03:39:00Z"
   },
   {
-    "additions": 8,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "additions": 11,
+    "author": "Brianzhengca",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45342",
-    "created_at": "2026-04-09T14:13:15Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45380",
+    "created_at": "2026-04-11T21:41:32Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45342/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45342",
+    "files_url": "https://github.com/huggingface/transformers/pull/45380/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45380",
     "labels": [],
     "merged": false,
-    "number": 45342,
+    "number": 45380,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Use `_keys_to_ignore_on_load_unexpected/missing` recursively from children",
-    "updated_at": "2026-04-09T14:23:31Z"
+    "state": "closed",
+    "title": "fix Qwen3_5MoeVisionConfig deepstack_visual_indexes silently dropped by @strict (Issue: https://github.com/huggingface/transformers/issues/45375)",
+    "updated_at": "2026-04-11T22:02:50Z"
   },
   {
-    "additions": 17,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/45314 with a better fix. Fixes https://github.com/huggingface/transformers/issues/45216 and https://github.com/huggingface/transformers/issues/45310 and ht\u2026",
+    "additions": 9,
+    "author": "hijingsong",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Problem The `@strict` decorator on `Qwen3_5MoeVisionConfig` silently drops the `deepstack_visual_indexes` field during config loading because it is not declared as a class attribute. Every Qwen3.5 MoE model on HuggingFace ships with thi\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45340",
-    "created_at": "2026-04-09T12:02:14Z",
-    "deletions": 14,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45379",
+    "created_at": "2026-04-11T19:40:19Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45340/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45340",
+    "files_url": "https://github.com/huggingface/transformers/pull/45379/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45379",
     "labels": [],
-    "merged": true,
-    "number": 45340,
+    "merged": false,
+    "number": 45379,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix conversion mappings for vlms",
-    "updated_at": "2026-04-10T10:18:09Z"
+    "state": "open",
+    "title": "fix(config): add deepstack_visual_indexes to Qwen3_5MoeVisionConfig",
+    "updated_at": "2026-04-13T15:34:33Z"
   },
   {
-    "additions": 156,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The CircleCI config file is not ruff formatted, leading to unwanted changes when it's opened in an editor that follows our repository ruff configuration. This patch adds it and runs `make style` to update it",
-    "changed_files": 3,
+    "additions": 5,
+    "author": "hijingsong",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Fix: Guard ReasoningEffort import for older mistral_common versions Fixes #45372 ### Problem `ReasoningEffort` was added in `mistral-common>=1.10.0`, but the import in `tokenization_mistral_common.py` was unconditional within the `is_mi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45339",
-    "created_at": "2026-04-09T09:44:16Z",
-    "deletions": 58,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45378",
+    "created_at": "2026-04-11T18:26:14Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45339/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45339",
+    "files_url": "https://github.com/huggingface/transformers/pull/45378/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45378",
     "labels": [],
-    "merged": true,
-    "number": 45339,
+    "merged": false,
+    "number": 45378,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "chore: added circleci python script to ruff and ty checkers",
-    "updated_at": "2026-04-09T12:00:08Z"
+    "state": "open",
+    "title": "fix(mistral): guard ReasoningEffort import for older mistral_common versions",
+    "updated_at": "2026-04-13T12:18:54Z"
   },
   {
-    "additions": 37,
-    "author": "RudrenduPaul",
+    "additions": 53,
+    "author": "Dippp10",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Closes #45162 ## What this PR does Expands the docstrings of `_can_set_attn_implementation` and `_can_set_experts_implementation` in `modeling_utils.py` to explicitly document the known limitations of their source-inspection heuristic. **C\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45338",
-    "created_at": "2026-04-09T09:35:52Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45377",
+    "created_at": "2026-04-11T14:17:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45338/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45338",
+    "files_url": "https://github.com/huggingface/transformers/pull/45377/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45377",
     "labels": [],
     "merged": false,
-    "number": 45338,
+    "number": 45377,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs: document known limitations of _can_set_attn/experts_implementation source inspection",
-    "updated_at": "2026-04-09T13:43:04Z"
+    "state": "open",
+    "title": "Enhance README documentation",
+    "updated_at": "2026-04-11T14:17:32Z"
   },
   {
-    "additions": 13,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Removing test_hub from CI for now",
-    "changed_files": 3,
+    "additions": 625,
+    "author": "mohamad-tohidi",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? i added an example for hierarchical text classification ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contributor\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45337",
-    "created_at": "2026-04-09T08:54:45Z",
-    "deletions": 30,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45374",
+    "created_at": "2026-04-11T10:00:07Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45337/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45337",
+    "files_url": "https://github.com/huggingface/transformers/pull/45374/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45374",
     "labels": [],
-    "merged": true,
-    "number": 45337,
+    "merged": false,
+    "number": 45374,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: remove test_hub for now",
-    "updated_at": "2026-04-09T09:28:52Z"
-  },
-  {
-    "additions": 84,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Follow-up of https://github.com/huggingface/transformers/pull/45312. This removes the unnecessary weights, and silently skip them during loading, so that the checkpoints on the hub do not have to b\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45336",
-    "created_at": "2026-04-09T08:43:55Z",
-    "deletions": 26,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45336/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45336",
-    "labels": [],
-    "merged": true,
-    "number": 45336,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "[gemma4] Remove all shared weights, and silently skip them during loading",
-    "updated_at": "2026-04-09T13:23:33Z"
+    "title": "Adding hierarchical classification example",
+    "updated_at": "2026-04-13T13:05:08Z"
   },
   {
-    "additions": 1333,
-    "author": "kmswin1",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Add A.X K1 model architecture What does this PR do? This PR adds support for A.X K1, a large-scale Mixture-of-Experts (MoE) language model developed by [SK Telecom](https://huggingface.co/skt). A.X K1 contains 519B total parameters with 33\u2026",
-    "changed_files": 8,
+    "additions": 3,
+    "author": "HelloAnner",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #45341 When CUDA is installed but no GPU is available, `get_device_properties()` calls `torch.cuda.get_device_capability()` which fails because there is no CUDA device. The fix moves `import torch` to the top of the function and adds\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45334",
-    "created_at": "2026-04-09T06:21:43Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45371",
+    "created_at": "2026-04-11T06:22:52Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45334/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45334",
+    "files_url": "https://github.com/huggingface/transformers/pull/45371/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45371",
     "labels": [],
     "merged": false,
-    "number": 45334,
+    "number": 45371,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Feature/add axk1",
-    "updated_at": "2026-04-09T06:21:43Z"
+    "state": "closed",
+    "title": "fix: check CUDA availability before calling get_device_capability",
+    "updated_at": "2026-04-13T06:40:16Z"
   },
   {
-    "additions": 471,
-    "author": "eladsegal",
+    "additions": 12,
+    "author": "RudrenduPaul",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds heterogeneous model support - the ability for individual layers to differ from the global config (e.g., different `intermediate_size`, `num_key_value_heads`) and to skip sub-modules entirely (MLP, attention, et\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Fixes five documentation errors in the `Gemma3nTextConfig` docstring in `modular_gemma3n.py` (and the generated `configuration_gemma3n.py`): 1. **Typo**: `\"emebeddings\"` \u2192 `\"embeddings\"` in `hidden_size_per_layer_in\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45333",
-    "created_at": "2026-04-09T06:18:11Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45370",
+    "created_at": "2026-04-11T06:15:05Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45333/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45333",
+    "files_url": "https://github.com/huggingface/transformers/pull/45370/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45370",
     "labels": [],
-    "merged": false,
-    "number": 45333,
+    "merged": true,
+    "number": 45370,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add heterogeneous config support (per-layer configuration)",
-    "updated_at": "2026-04-12T08:12:42Z"
+    "state": "closed",
+    "title": "docs: fix 5 docstring errors in Gemma3nTextConfig (typos, grammar, formatting)",
+    "updated_at": "2026-04-13T13:14:39Z"
   },
   {
-    "additions": 2152,
-    "author": "eladsegal",
+    "additions": 10,
+    "author": "sharziki",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds heterogeneous model support - the ability for individual layers to differ from the global config (e.g., different `intermediate_size`, `num_key_value_heads`) and to skip sub-modules entirely (MLP, attention, et\u2026",
-    "changed_files": 14,
+    "body_excerpt": "## Summary Fixes #45245 \u2014 `torch.multinomial` crashes with `RuntimeError: number of categories cannot exceed 2^24` when `num_beams * vocab_size > 16,777,216` during beam search with `do_sample=True`. **Root cause:** In `_get_top_k_continua\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45332",
-    "created_at": "2026-04-09T05:56:31Z",
-    "deletions": 40,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45369",
+    "created_at": "2026-04-11T02:42:07Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45332/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45332",
+    "files_url": "https://github.com/huggingface/transformers/pull/45369/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45369",
     "labels": [],
     "merged": false,
-    "number": 45332,
+    "number": 45369,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add heterogeneous model support (per-layer config and modeling)",
-    "updated_at": "2026-04-12T08:25:10Z"
+    "state": "closed",
+    "title": "fix(generation): handle CUDA multinomial limit in beam search sampling",
+    "updated_at": "2026-04-13T12:49:42Z"
   },
   {
-    "additions": 12,
-    "author": "Kash6",
+    "additions": 6,
+    "author": "sharziki",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "get_rope_index unconditionally applies tokens_per_second temporal scaling to both images and videos. For still images (modality_type == 1), this shifts the temporal position origin to start_position * tokens_per_second instead of start_pos\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #45362 \u2014 `transformers chat` crashes with `AttributeError: 'Qwen3VLProcessor' object has no attribute '_tokenizer'` when streaming responses from Qwen models. **Root cause:** `GenerateManager.generate_streaming()` and `CBG\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45330",
-    "created_at": "2026-04-08T23:51:52Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45368",
+    "created_at": "2026-04-11T02:34:32Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45330/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45330",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45368/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45368",
+    "labels": [],
     "merged": true,
-    "number": 45330,
+    "number": 45368,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen2.5-VL temporal RoPE scaling applied to still images",
-    "updated_at": "2026-04-13T09:36:23Z"
+    "title": "fix(serving): resolve rust tokenizer from ProcessorMixin in streaming generation",
+    "updated_at": "2026-04-13T15:01:51Z"
   },
   {
-    "additions": 152,
-    "author": "abidlabs",
-    "author_association": "MEMBER",
-    "body_excerpt": "Updates `TrackioCallback` and `TrainingArguments` for the latest version of Trackio using HF Buckets as the backend, and control over creating a static Space for the Trackio dashboard during or at the end of training. These are now the `Tr\u2026",
+    "additions": 15,
+    "author": "jackcook",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update to the latest Four Over Six API by adding options to specify the data type of activations, weights, and gradients individually cc @SunMarc",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45329",
-    "created_at": "2026-04-08T22:36:08Z",
-    "deletions": 57,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45367",
+    "created_at": "2026-04-11T01:47:51Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45329/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45329",
+    "files_url": "https://github.com/huggingface/transformers/pull/45367/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45367",
     "labels": [],
     "merged": false,
-    "number": 45329,
-    "review_comments_count": 21,
+    "number": 45367,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Update `trackio` integration to use Buckets and \"freeze\" Space after training",
-    "updated_at": "2026-04-10T18:20:51Z"
+    "title": "Add dtype config options for Four Over Six",
+    "updated_at": "2026-04-13T13:29:07Z"
   },
   {
-    "additions": 9,
-    "author": "RyanMullins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #45242 * Drops `k_proj`, `k_norm`, and `v_proj` weights for `Gemma4TextAttention` modules from the checkpoint if the layer shares KV cache values. These changes can also be adapted to Gemma 3n if that's desira\u2026",
-    "changed_files": 1,
+    "additions": 64,
+    "author": "owwll",
+    "author_association": "NONE",
+    "body_excerpt": "This PR addresses two separate issues: 1. **Fixes a bug in `Mistral4` RoPE dimension calculation.** The `Mistral4RotaryEmbedding` was incorrectly using the full `head_dim` to calculate the rotary dimension, instead of respecting the `parti\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45328",
-    "created_at": "2026-04-08T20:43:42Z",
-    "deletions": 6,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45366",
+    "created_at": "2026-04-10T21:13:32Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45328/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45328",
+    "files_url": "https://github.com/huggingface/transformers/pull/45366/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45366",
     "labels": [],
     "merged": false,
-    "number": 45328,
+    "number": 45366,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Drop unused Gemma4TextAttention weights when sharing KV Cache",
-    "updated_at": "2026-04-09T18:31:13Z"
+    "state": "closed",
+    "title": "Fix OLMoE routing and Mistral4 RoPE dimensions",
+    "updated_at": "2026-04-10T21:40:40Z"
   },
   {
-    "additions": 283,
-    "author": "stevhliu",
+    "additions": 46,
+    "author": "burtenshaw",
     "author_association": "MEMBER",
-    "body_excerpt": "refactors the how to add a model with modular transformers doc: - structure: - flipped the order so you learn how to write the modular file first before generating it - remove the motivator examples with BERT/RoBERTa - merge the two `super\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Applied the overlapping GPT-J refactor from the staged PRs: added `_can_record_outputs`, moved `GPTJModel.forward` to decorator-based output capture, switched wrapper forwards to `@can_return_tuple`, and removed manual hidden-state/attenti\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45327",
-    "created_at": "2026-04-08T20:23:28Z",
-    "deletions": 403,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45327/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45327",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45365",
+    "created_at": "2026-04-10T19:37:53Z",
+    "deletions": 101,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45365/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45365",
     "labels": [],
     "merged": false,
-    "number": 45327,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "[docs] modular transformers",
-    "updated_at": "2026-04-09T12:01:48Z"
+    "number": 45365,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor GPT-J output tracing to use standardized decorators",
+    "updated_at": "2026-04-10T19:53:25Z"
   },
   {
-    "additions": 13,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 This PR introduces compat fixes across several audio models to ensure they can be loaded and used by a companion vLLM PR. <ins>These changes are deliberate and are blocking</ins> [this vLLM PR](https://github.co\u2026",
-    "changed_files": 10,
+    "additions": 815,
+    "author": "caiovicentino",
+    "author_association": "NONE",
+    "body_excerpt": "### Summary Adds a third backend to `QuantizedCache`: `polarquant`. Joins the existing `quanto` and `hqq` options and implements a Walsh-Hadamard rotation plus Lloyd-Max scalar quantization scheme tuned for KV cache compression. Pure PyTor\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45326",
-    "created_at": "2026-04-08T18:28:35Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45326/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45326",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45364",
+    "created_at": "2026-04-10T19:02:16Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45364/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45364",
     "labels": [],
     "merged": false,
-    "number": 45326,
+    "number": 45364,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "feat[vLLM \u00d7 v5]: Add vLLM compatibility for audio models",
-    "updated_at": "2026-04-09T13:35:32Z"
+    "state": "closed",
+    "title": "Add PolarQuant backend to QuantizedCache (Hadamard-rotated Lloyd-Max)",
+    "updated_at": "2026-04-13T13:31:36Z"
   },
   {
-    "additions": 234,
-    "author": "zucchini-nlp",
+    "additions": 301,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45276 and https://github.com/huggingface/transformers/issues/45335 In gemma4 per-layer inputs have to be resized as long as they aren't part of soft multimoda\u2026",
-    "changed_files": 15,
+    "body_excerpt": "# What does this PR do? This PR adds support for fusing multiple modules into a single kernel \u2014 the motivating case being fused RMSNorm+MLP kernels, but the API is generic. ## What changed - `FusedModuleBase`, `fuse_modules`, `unfuse_modul\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45324",
-    "created_at": "2026-04-08T17:06:26Z",
-    "deletions": 53,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45324/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45324",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45363",
+    "created_at": "2026-04-10T18:53:22Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45363/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45363",
     "labels": [],
     "merged": false,
-    "number": 45324,
-    "review_comments_count": 5,
+    "number": 45363,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "Gemma4 resizing per layer inputs",
-    "updated_at": "2026-04-13T09:14:48Z"
+    "title": "n-to-1 kernel fusion via `KernelConfig`",
+    "updated_at": "2026-04-14T13:14:57Z"
   },
   {
-    "additions": 124,
-    "author": "remi-or",
+    "additions": 98,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR fixes the issue raised in https://github.com/huggingface/transformers/pull/45274 . CUDA graph reuse in continuous batching used (num_q_tokens, max_kv_read) as the graph cache key. However, FlashAttention varlen kernels al\u2026",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5497, also fixes https://github.com/huggingface/transformers/issues/45390 TL;DR; the base model prefix is never appended if it is part of a bigger VLM, which was true\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45323",
-    "created_at": "2026-04-08T16:30:18Z",
-    "deletions": 66,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45323/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45323",
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45361",
+    "created_at": "2026-04-10T13:49:28Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45361/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45361",
     "labels": [],
     "merged": false,
-    "number": 45323,
+    "number": 45361,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[CB] Fix capture of max_seqlen",
-    "updated_at": "2026-04-09T04:54:44Z"
+    "title": "Add CLIP-like models in conversion to VLMs",
+    "updated_at": "2026-04-13T12:16:59Z"
   },
   {
-    "additions": 20,
-    "author": "andrewor14",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "**Summary:** TorchAO recently deprecated AffineQuantizedTensor and related classes (pytorch/ao#2752). These will be removed in the next release. We should remove references of these classes in transformers before then. **Test Plan:** ``` p\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "hanouticelina",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? `huggingface-cli` is deprecated and no longer maintained. This PR updates the remaining references with `hf`",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45321",
-    "created_at": "2026-04-08T15:42:16Z",
-    "deletions": 29,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45360",
+    "created_at": "2026-04-10T11:59:37Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45321/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45321",
+    "files_url": "https://github.com/huggingface/transformers/pull/45360/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45360",
     "labels": [],
     "merged": false,
-    "number": 45321,
+    "number": 45360,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Remove references to torchao's AffineQuantizedTensor",
-    "updated_at": "2026-04-09T12:21:03Z"
+    "title": "Replace deprecated `huggingface-cli` references with `hf`",
+    "updated_at": "2026-04-10T12:13:18Z"
   },
   {
-    "additions": 5,
-    "author": "Regata3010",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in assisted generation when using model pairs with different vocabulary sizes but the same tokenizer family (e.g., Qwen2.5-7B + Qwen2.5-0.5B). `map_input_embeddings` is only initialized when `len(self\u2026",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #45356 ## Summary - Remove `kimi_k25` from `MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS`: its remote `TikTokenTokenizer` is the only correct backend \u2014 the model has no `tokenizer.json`, and its `added_tokens_decoder` has non-sequential\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45320",
-    "created_at": "2026-04-08T15:30:16Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45359",
+    "created_at": "2026-04-10T10:42:32Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45320/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45320",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45359/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45359",
+    "labels": [
+      "for patch"
+    ],
     "merged": true,
-    "number": 45320,
+    "number": 45359,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AttributeError in AssistantToTargetTranslator.unmap_input_ids with cross-vocab models",
-    "updated_at": "2026-04-10T17:46:37Z"
+    "title": "Fix Kimi-K2.5 tokenizer regression and _patch_mistral_regex AttributeError",
+    "updated_at": "2026-04-13T15:16:26Z"
   },
   {
-    "additions": 266,
-    "author": "tarekziade",
+    "additions": 111,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Removes `HUGGINGFACE_CO_STAGING` when downloading artifacts - adds a retry mechanism for external URLs (with partial file cleanup)",
-    "changed_files": 13,
+    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/45357 finally. This was not catched in the previous fix, as the model can be reloaded correctly by `from_pretrained`, but keys are still wrongly serialized! Aft\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45319",
-    "created_at": "2026-04-08T14:51:48Z",
-    "deletions": 79,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45358",
+    "created_at": "2026-04-10T10:19:42Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45319/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45319",
-    "labels": [],
-    "merged": false,
-    "number": 45319,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "fix: dont download artifacts from the test hub",
-    "updated_at": "2026-04-09T15:48:19Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/45358/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45358",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45358,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix vlm weight mappings",
+    "updated_at": "2026-04-10T15:41:47Z"
   },
   {
-    "additions": 5,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? AutoTokenizer.register() adds classes to the global `REGISTERED_TOKENIZER_CLASSES` dict and some tests did not clean up behind them, leading to leaky state between tests",
-    "changed_files": 1,
+    "additions": 2297,
+    "author": "Shikhar-S",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR introduces [PhoneticXeus](https://arxiv.org/abs/2603.29042), which is the state-of-the-art universal phone recognizer trained on 70+ languages and evaluated on ~100 languages. The model should have high util\u2026",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45318",
-    "created_at": "2026-04-08T13:46:47Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45355",
+    "created_at": "2026-04-10T04:26:05Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45318/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45318",
-    "labels": [],
-    "merged": true,
-    "number": 45318,
+    "files_url": "https://github.com/huggingface/transformers/pull/45355/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45355",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 45355,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: leak in tokenizer registry for `test_processors`",
-    "updated_at": "2026-04-09T10:12:46Z"
+    "state": "open",
+    "title": "Add universal phone recognition model - PhoneticXeus",
+    "updated_at": "2026-04-14T15:02:02Z"
   },
   {
-    "additions": 24,
-    "author": "mohdfaour03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #45081 ## Problem Loading a Mistral tokenizer with `fix_mistral_regex=True` crashes because `_patch_mistral_regex` receives a raw `tokenizers.Tokenizer` but tries to access `.backend_tokenizer.pre_tokenizer` on it \u2014 that attribute on\u2026",
-    "changed_files": 2,
+    "additions": 8,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Gemma 4 was calculating the CE loss incorrectly and not handling gradient accumulation steps properly, leading to losses scaled up by the value of the gradient accumulation steps rather than letting the built in HF\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45317",
-    "created_at": "2026-04-08T13:38:46Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45354",
+    "created_at": "2026-04-10T02:54:56Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45317/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45317",
-    "labels": [],
-    "merged": false,
-    "number": 45317,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fix AttributeError in _patch_mistral_regex when fix_mistral_regex=True  ",
-    "updated_at": "2026-04-09T13:52:30Z"
-  },
-  {
-    "additions": 9,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title and seems like there are no objections. Also added some colors in verbose logging cc @tarekziade @tomaarsen @yonigozlan if you have better ideas to style this (just tagging since you reacted \u2795 ) This is\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45316",
-    "created_at": "2026-04-08T13:01:15Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45316/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45316",
+    "files_url": "https://github.com/huggingface/transformers/pull/45354/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45354",
     "labels": [],
     "merged": true,
-    "number": 45316,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Logger has `[transformers]` prefix in non-verbose mode",
-    "updated_at": "2026-04-10T17:06:00Z"
-  },
-  {
-    "additions": 46,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Reusing a variable name meant that we returned a softmaxed value instead of the original logits in some MoE routers. This generally did not affect inference, but could affect the auxiliary loss on MoE logits in training when the coefficien\u2026",
-    "changed_files": 15,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45315",
-    "created_at": "2026-04-08T12:54:52Z",
-    "deletions": 30,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45315/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45315",
-    "labels": [],
-    "merged": false,
-    "number": 45315,
+    "number": 45354,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix softmaxing router logits",
-    "updated_at": "2026-04-10T13:25:20Z"
+    "title": "fix gemma4 gradient accumulation loss and last token incorrect labels",
+    "updated_at": "2026-04-10T10:08:12Z"
   },
   {
-    "additions": 18,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? fixes https://github.com/huggingface/transformers/issues/45216 and https://github.com/huggingface/transformers/issues/45310 and https://github.com/huggingface/transformers/issues/45313 TBH load-save-load works for t\u2026",
-    "changed_files": 10,
+    "additions": 30,
+    "author": "wilnn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add `**kwargs` to all methods in the `CallbackHandler` class. Previously, only the `CallbackHandler.on_push_begin()` method accepted `**kwargs`, while all other methods did not. This forces users who want to customi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45314",
-    "created_at": "2026-04-08T11:54:53Z",
-    "deletions": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45353",
+    "created_at": "2026-04-09T23:14:20Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45314/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45314",
+    "files_url": "https://github.com/huggingface/transformers/pull/45353/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45353",
     "labels": [],
-    "merged": false,
-    "number": 45314,
+    "merged": true,
+    "number": 45353,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Conversion for LLM class loading with VLM ckpt ",
-    "updated_at": "2026-04-10T09:18:26Z"
+    "title": "add kwargs to all methods in the CallbackHandler class",
+    "updated_at": "2026-04-10T14:16:46Z"
   },
   {
-    "additions": 61,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. It was confirmed that the weight matrices of shared layers are NEVER used, and that kv states should ALWAYS be shared, even during training or inference without Cache. I will fully remove them on a\u2026",
-    "changed_files": 3,
+    "additions": 4,
+    "author": "RudrenduPaul",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Corrects an incorrect return type annotation on `Qwen3MoeSparseMoeBlock.forward`. The method is annotated as returning `tuple[torch.Tensor, torch.Tensor]` but actually returns a single reshaped `torch.Tensor` (see\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45312",
-    "created_at": "2026-04-08T11:33:33Z",
-    "deletions": 24,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45352",
+    "created_at": "2026-04-09T21:53:31Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45312/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45312",
+    "files_url": "https://github.com/huggingface/transformers/pull/45352/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45352",
     "labels": [],
     "merged": true,
-    "number": 45312,
+    "number": 45352,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[gemma4] Dissociate kv states sharing from the Cache",
-    "updated_at": "2026-04-09T08:08:07Z"
+    "title": "fix(qwen3_moe): correct return type annotation on Qwen3MoeSparseMoeBlock.forward",
+    "updated_at": "2026-04-13T14:07:30Z"
   },
   {
     "additions": 2,
-    "author": "KoichiYasuoka",
+    "author": "RudrenduPaul",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #45292 (seems to come from #41580) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by\u2026",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `get_device_properties()` in `testing_utils.py` when CUDA is installed on the system but no GPU device is present (e.g., a CPU-only cloud studio with CUDA libraries installed). The function called\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45311",
-    "created_at": "2026-04-08T10:38:34Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45351",
+    "created_at": "2026-04-09T21:51:42Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45311/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45311",
+    "files_url": "https://github.com/huggingface/transformers/pull/45351/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45351",
     "labels": [],
     "merged": false,
-    "number": 45311,
+    "number": 45351,
     "review_comments_count": 0,
     "state": "open",
-    "title": "resize_token_embeddings does not effect to output_embeddings",
-    "updated_at": "2026-04-12T23:25:29Z"
+    "title": "fix(testing_utils): guard get_device_capability with torch.cuda.is_available()",
+    "updated_at": "2026-04-14T00:49:18Z"
   },
   {
-    "additions": 301,
-    "author": "agentspan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #45290. `ProcessorMixin.apply_chat_template` and several related code paths assumed every message in a conversation has a `content` key. Assistant messages with `tool_calls` and no textual content (a valid shape per the Op\u2026",
-    "changed_files": 9,
+    "additions": 1090,
+    "author": "mrutkows",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Support new Granite 4 vision arch. <!-- Remove if not applicable --> Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by c\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45309",
-    "created_at": "2026-04-08T08:40:08Z",
-    "deletions": 23,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45309/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45309",
-    "labels": [
-      "Code agent slop"
-    ],
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45350",
+    "created_at": "2026-04-09T17:46:37Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45350/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45350",
+    "labels": [],
     "merged": false,
-    "number": 45309,
+    "number": 45350,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix KeyError in apply_chat_template when message has no content (#45290)",
-    "updated_at": "2026-04-08T11:30:37Z"
+    "state": "open",
+    "title": "WIP: Add support for Granite4VisionForConditionalGeneration",
+    "updated_at": "2026-04-10T12:34:50Z"
   },
   {
-    "additions": 10,
-    "author": "juliabush",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #29942 Flash Attention 2 inference equivalence tests for Whisper can fail due to higher numerical variance compared to the eager attention implementation. This PR increases the tolerance (`atol`, `rtol`) spec\u2026",
-    "changed_files": 1,
+    "additions": 90,
+    "author": "florian6973",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45305 Add a regression test in `TrainerGradientAccumulationTest` to avoid passing the GAS value to Accelerate by mistake Description: I force the value of the `num_steps` parameter to be 1, and the regression\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45303",
-    "created_at": "2026-04-07T21:37:00Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45349",
+    "created_at": "2026-04-09T17:24:39Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45303/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45303",
+    "files_url": "https://github.com/huggingface/transformers/pull/45349/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45349",
     "labels": [
-      "Code agent slop"
+      "for patch"
     ],
-    "merged": false,
-    "number": 45303,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45349,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix FA2 inference equivalence failures for Whisper (closes #29942)",
-    "updated_at": "2026-04-08T14:42:36Z"
+    "title": "Fix #45305 + add regression test GAS",
+    "updated_at": "2026-04-13T14:41:43Z"
   },
   {
-    "additions": 7,
-    "author": "jagwar",
+    "additions": 50,
+    "author": "qgallouedec",
     "author_association": "MEMBER",
-    "body_excerpt": "## Security Fix Fixes a trust check bypass in `trl-ci-bot.yml` that allowed any GitHub user to trigger TRL CI on self-hosted GPU runners by commenting `/trl-ci` on any PR. ### The bug The \"Ignore untrusted commenter\" step used `exit 0`, wh\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes #45290 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45302",
-    "created_at": "2026-04-07T21:35:38Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45348",
+    "created_at": "2026-04-09T15:59:07Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45302/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45302",
+    "files_url": "https://github.com/huggingface/transformers/pull/45348/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45348",
     "labels": [],
     "merged": true,
-    "number": 45302,
-    "review_comments_count": 0,
+    "number": 45348,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "fix(security): prevent untrusted users from triggering TRL CI dispatch",
-    "updated_at": "2026-04-07T21:59:38Z"
+    "title": "Fix `apply_chat_template` crash on `tool_call` messages without content",
+    "updated_at": "2026-04-13T19:44:38Z"
   },
   {
-    "additions": 0,
-    "author": "sahildando",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 0,
+    "additions": 35,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. `accelerate` destroys the dict otherwise, if it's not BOTH passed as kwarg AND part of `_skip_keys_device_placement`.......... `per_layer_input` needs to stay as a positional arg, for gradient chec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45301",
-    "created_at": "2026-04-07T21:12:29Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45347",
+    "created_at": "2026-04-09T15:31:34Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45301/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45301",
+    "files_url": "https://github.com/huggingface/transformers/pull/45347/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45347",
     "labels": [],
-    "merged": false,
-    "number": 45301,
+    "merged": true,
+    "number": 45347,
     "review_comments_count": 0,
     "state": "closed",
-    "title": " docs maintenance for transformers repository 979e8",
-    "updated_at": "2026-04-09T13:28:27Z"
+    "title": "[gemma4] Fix device map auto",
+    "updated_at": "2026-04-09T15:45:15Z"
   },
   {
-    "additions": 136,
-    "author": "w4nderlust",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Nemotron-H models use standalone MLP layers in their `hybrid_override_pattern` (the `-` character), but the config parser, validators, and modeling code only know about `mamba`/`attention`/`moe`. This means every Ne\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
+    "additions": 16,
+    "author": "ionut-anghelina",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 2,
+    "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45300",
-    "created_at": "2026-04-07T20:57:45Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45346",
+    "created_at": "2026-04-09T14:48:28Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45300/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45300",
+    "files_url": "https://github.com/huggingface/transformers/pull/45346/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45346",
     "labels": [],
     "merged": false,
-    "number": 45300,
-    "review_comments_count": 0,
+    "number": 45346,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Fix Nemotron-H: add mlp layer type support",
-    "updated_at": "2026-04-09T16:16:30Z"
+    "title": "Fix Double Application of Softmax for Router Logits in MoE models",
+    "updated_at": "2026-04-13T12:40:28Z"
   },
   {
-    "additions": 3,
-    "author": "partacc",
+    "additions": 30,
+    "author": "ansley",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Some CI security tests... I will contact you privately if there is anything worth reporting. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear i\u2026",
+    "body_excerpt": "The `transformers` V5 \"rm slow tokenizers\" refactor (\\#40936) aliased `LlamaTokenizerFast` to `LlamaTokenizer`, whose `__init__` unconditionally installs a SentencePiece Metaspace pre-tokenizer. This is correct for classic Llama/Llama-2 mo\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45345",
+    "created_at": "2026-04-09T14:31:40Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45345/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45345",
+    "labels": [],
+    "merged": false,
+    "number": 45345,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix ByteLevel-BPE tokenizers silently breaking in `LlamaTokenizer`",
+    "updated_at": "2026-04-10T12:45:24Z"
+  },
+  {
+    "additions": 6,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Simple hook to display test duration. This will append inline duration per test during the run, example: ``` tests/utils/test_configuration_utils.py::ConfigPushToHubTester::test_push_to_hub [gw1] [ 90%] PASSED tests\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45299",
-    "created_at": "2026-04-07T20:07:02Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45344",
+    "created_at": "2026-04-09T14:22:46Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45299/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45299",
+    "files_url": "https://github.com/huggingface/transformers/pull/45344/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45344",
     "labels": [],
-    "merged": false,
-    "number": 45299,
+    "merged": true,
+    "number": 45344,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Please ignore] CI Test PR",
-    "updated_at": "2026-04-07T20:23:28Z"
+    "title": "refactor: display test duration",
+    "updated_at": "2026-04-09T15:19:26Z"
   },
   {
-    "additions": 2958,
-    "author": "DatLe203",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 13,
+    "additions": 8,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45298",
-    "created_at": "2026-04-07T19:39:50Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45342",
+    "created_at": "2026-04-09T14:13:15Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45298/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45298",
+    "files_url": "https://github.com/huggingface/transformers/pull/45342/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45342",
     "labels": [],
     "merged": false,
-    "number": 45298,
+    "number": 45342,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add new qwen2 5 vl",
-    "updated_at": "2026-04-07T20:00:43Z"
+    "state": "open",
+    "title": "Use `_keys_to_ignore_on_load_unexpected/missing` recursively from children",
+    "updated_at": "2026-04-09T14:23:31Z"
   },
   {
-    "additions": 21,
-    "author": "EhteshamSid",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes mutable default argument bugs in four quantization config `__init__` methods inside `quantization_config.py`. In Python, mutable objects used as default argument values (e.g. `=[]`, `={}`) are created once at\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/45314 with a better fix. Fixes https://github.com/huggingface/transformers/issues/45216 and https://github.com/huggingface/transformers/issues/45310 and ht\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45297",
-    "created_at": "2026-04-07T18:40:05Z",
-    "deletions": 7,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45340",
+    "created_at": "2026-04-09T12:02:14Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45297/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45297",
+    "files_url": "https://github.com/huggingface/transformers/pull/45340/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45340",
     "labels": [],
-    "merged": false,
-    "number": 45297,
+    "merged": true,
+    "number": 45340,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix mutable default arguments in quantization config classes",
-    "updated_at": "2026-04-09T14:00:25Z"
+    "title": "Fix conversion mappings for vlms",
+    "updated_at": "2026-04-10T10:18:09Z"
   },
   {
-    "additions": 191,
-    "author": "UsamaKenway",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 156,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The CircleCI config file is not ruff formatted, leading to unwanted changes when it's opened in an editor that follows our repository ruff configuration. This patch adds it and runs `make style` to update it",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45296",
-    "created_at": "2026-04-07T18:39:33Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45339",
+    "created_at": "2026-04-09T09:44:16Z",
+    "deletions": 58,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45296/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45296",
+    "files_url": "https://github.com/huggingface/transformers/pull/45339/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45339",
     "labels": [],
-    "merged": false,
-    "number": 45296,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "Add GGUF support to Gemma4 (31B & 26B-A4B) text ",
-    "updated_at": "2026-04-12T10:53:54Z"
+    "merged": true,
+    "number": 45339,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "chore: added circleci python script to ruff and ty checkers",
+    "updated_at": "2026-04-09T12:00:08Z"
   },
   {
-    "additions": 116,
-    "author": "jesperschlegel",
+    "additions": 37,
+    "author": "RudrenduPaul",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Adds the Gemma4ForSequenceClassification class which supports finetuning Gemma 4 models on sequence classification tasks.",
-    "changed_files": 5,
+    "body_excerpt": "Closes #45162 ## What this PR does Expands the docstrings of `_can_set_attn_implementation` and `_can_set_experts_implementation` in `modeling_utils.py` to explicitly document the known limitations of their source-inspection heuristic. **C\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45294",
-    "created_at": "2026-04-07T17:51:38Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45338",
+    "created_at": "2026-04-09T09:35:52Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45294/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45294",
+    "files_url": "https://github.com/huggingface/transformers/pull/45338/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45338",
     "labels": [],
     "merged": false,
-    "number": 45294,
+    "number": 45338,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "feat: add Gemma4ForSequenceClassification",
-    "updated_at": "2026-04-07T18:12:00Z"
+    "state": "closed",
+    "title": "docs: document known limitations of _can_set_attn/experts_implementation source inspection",
+    "updated_at": "2026-04-09T13:43:04Z"
   },
   {
-    "additions": 4,
-    "author": "yonigozlan",
+    "additions": 13,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "## Fix global state leak in `AutoTokenizer.register` causing test failures ### Problem `test_from_pretrained_dynamic_processor` was failing when run as part of the full test class with: ``` AttributeError: NewTokenizer has no attribute spe\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Removing test_hub from CI for now",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45293",
-    "created_at": "2026-04-07T16:29:25Z",
-    "deletions": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45337",
+    "created_at": "2026-04-09T08:54:45Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45293/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45293",
+    "files_url": "https://github.com/huggingface/transformers/pull/45337/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45337",
     "labels": [],
-    "merged": false,
-    "number": 45293,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fix \"AttributeError: NewTokenizer has no attribute special_attribute_present\" (Remove `REGISTERED_FAST_ALIASES`)",
-    "updated_at": "2026-04-09T12:04:21Z"
+    "merged": true,
+    "number": 45337,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "chore: remove test_hub for now",
+    "updated_at": "2026-04-09T09:28:52Z"
   },
   {
-    "additions": 2958,
-    "author": "DatLe203",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 11,
+    "additions": 84,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Follow-up of https://github.com/huggingface/transformers/pull/45312. This removes the unnecessary weights, and silently skip them during loading, so that the checkpoints on the hub do not have to b\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45291",
-    "created_at": "2026-04-07T14:30:38Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45336",
+    "created_at": "2026-04-09T08:43:55Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45291/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45291",
+    "files_url": "https://github.com/huggingface/transformers/pull/45336/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45336",
     "labels": [],
-    "merged": false,
-    "number": 45291,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45336,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "First pull request",
-    "updated_at": "2026-04-07T14:53:52Z"
+    "title": "[gemma4] Remove all shared weights, and silently skip them during loading",
+    "updated_at": "2026-04-09T13:23:33Z"
   },
   {
-    "additions": 24,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45265 and doesn't warn if the value is an int.",
-    "changed_files": 5,
+    "additions": 1333,
+    "author": "kmswin1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Add A.X K1 model architecture What does this PR do? This PR adds support for A.X K1, a large-scale Mixture-of-Experts (MoE) language model developed by [SK Telecom](https://huggingface.co/skt). A.X K1 contains 519B total parameters with 33\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45289",
-    "created_at": "2026-04-07T13:00:52Z",
-    "deletions": 73,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45289/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45289",
-    "labels": [],
-    "merged": false,
-    "number": 45289,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Less unnecessary RoPE warnings",
-    "updated_at": "2026-04-10T10:10:43Z"
-  },
-  {
-    "additions": 35,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45288",
-    "created_at": "2026-04-07T10:24:53Z",
-    "deletions": 11,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45288/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45288",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45334",
+    "created_at": "2026-04-09T06:21:43Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45334/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45334",
     "labels": [],
     "merged": false,
-    "number": 45288,
+    "number": 45334,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(cohere_asr): auto-fix failing tests",
-    "updated_at": "2026-04-10T10:41:27Z"
+    "title": "Feature/add axk1",
+    "updated_at": "2026-04-14T07:23:08Z"
   },
   {
-    "additions": 102,
-    "author": "kaixuanliu",
+    "additions": 471,
+    "author": "eladsegal",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Adds heterogeneous model support - the ability for individual layers to differ from the global config (e.g., different `intermediate_size`, `num_key_value_heads`) and to skip sub-modules entirely (MLP, attention, et\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45287",
-    "created_at": "2026-04-07T10:23:45Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45333",
+    "created_at": "2026-04-09T06:18:11Z",
     "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45287/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45287",
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45333/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45333",
     "labels": [],
     "merged": false,
-    "number": 45287,
+    "number": 45333,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(videomt): auto-fix failing tests",
-    "updated_at": "2026-04-10T10:41:30Z"
+    "state": "open",
+    "title": "Add heterogeneous config support (per-layer configuration)",
+    "updated_at": "2026-04-14T14:08:07Z"
   },
   {
-    "additions": 29,
-    "author": "kaixuanliu",
+    "additions": 2152,
+    "author": "eladsegal",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Adds heterogeneous model support - the ability for individual layers to differ from the global config (e.g., different `intermediate_size`, `num_key_value_heads`) and to skip sub-modules entirely (MLP, attention, et\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45286",
-    "created_at": "2026-04-07T10:22:51Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45286/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45286",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45332",
+    "created_at": "2026-04-09T05:56:31Z",
+    "deletions": 40,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45332/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45332",
     "labels": [],
     "merged": false,
-    "number": 45286,
+    "number": 45332,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(nomic_bert): auto-fix failing tests",
-    "updated_at": "2026-04-10T10:41:28Z"
+    "state": "open",
+    "title": "Add heterogeneous model support (per-layer config and modeling)",
+    "updated_at": "2026-04-14T14:03:01Z"
   },
   {
-    "additions": 153,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 4,
+    "additions": 12,
+    "author": "Kash6",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "get_rope_index unconditionally applies tokens_per_second temporal scaling to both images and videos. For still images (modality_type == 1), this shifts the temporal position origin to start_position * tokens_per_second instead of start_pos\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45285",
-    "created_at": "2026-04-07T09:18:11Z",
-    "deletions": 329,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45330",
+    "created_at": "2026-04-08T23:51:52Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45285/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45285",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45330/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45330",
+    "labels": [
+      "for patch"
+    ],
     "merged": true,
-    "number": 45285,
-    "review_comments_count": 6,
+    "number": 45330,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix export for gemma4 and add Integration tests",
-    "updated_at": "2026-04-08T13:07:40Z"
+    "title": "Fix Qwen2.5-VL temporal RoPE scaling applied to still images",
+    "updated_at": "2026-04-14T03:21:32Z"
   },
   {
-    "additions": 4,
-    "author": "Abdennacer-Badaoui",
+    "additions": 152,
+    "author": "abidlabs",
     "author_association": "MEMBER",
-    "body_excerpt": "Following this PR : https://github.com/huggingface/transformers/pull/45268. This PR fix Qwen2 expectations for AMD.",
-    "changed_files": 1,
+    "body_excerpt": "Updates `TrackioCallback` and `TrainingArguments` for the latest version of Trackio using HF Buckets as the backend, and control over creating a static Space for the Trackio dashboard during or at the end of training. These are now the `Tr\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45284",
-    "created_at": "2026-04-07T09:12:01Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45329",
+    "created_at": "2026-04-08T22:36:08Z",
+    "deletions": 57,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45284/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45284",
+    "files_url": "https://github.com/huggingface/transformers/pull/45329/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45329",
     "labels": [],
     "merged": true,
-    "number": 45284,
-    "review_comments_count": 0,
+    "number": 45329,
+    "review_comments_count": 21,
     "state": "closed",
-    "title": "[AMD CI] Fix Qwen2 expectations",
-    "updated_at": "2026-04-07T12:01:58Z"
+    "title": "Update `trackio` integration to use Buckets and \"freeze\" Space after training",
+    "updated_at": "2026-04-13T14:30:27Z"
   },
   {
-    "additions": 107,
-    "author": "jIab-b",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 9,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45242 * Drops `k_proj`, `k_norm`, and `v_proj` weights for `Gemma4TextAttention` modules from the checkpoint if the layer shares KV cache values. These changes can also be adapted to Gemma 3n if that's desira\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45283",
-    "created_at": "2026-04-07T09:11:14Z",
-    "deletions": 0,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45328",
+    "created_at": "2026-04-08T20:43:42Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45283/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45283",
+    "files_url": "https://github.com/huggingface/transformers/pull/45328/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45328",
     "labels": [],
     "merged": false,
-    "number": 45283,
+    "number": 45328,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add Qwen3.5 GGUF loading support",
-    "updated_at": "2026-04-09T14:49:22Z"
+    "title": "Drop unused Gemma4TextAttention weights when sharing KV Cache",
+    "updated_at": "2026-04-09T18:31:13Z"
   },
   {
-    "additions": 31,
-    "author": "Abdennacer-Badaoui",
+    "additions": 283,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "`_register_model_output_pytree_node` was calling set.__contains__ during TorchDynamo tracing, which is unsupported in PyTorch 2.8.0 (ROCm). Added an early return when `torch.compiler.is_compiling()` is True, since pytree nodes are already\u2026",
+    "body_excerpt": "refactors the how to add a model with modular transformers doc: - structure: - flipped the order so you learn how to write the modular file first before generating it - remove the motivator examples with BERT/RoBERTa - merge the two `super\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45282",
-    "created_at": "2026-04-07T08:50:54Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45282/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45282",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45327",
+    "created_at": "2026-04-08T20:23:28Z",
+    "deletions": 403,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45327/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45327",
     "labels": [],
     "merged": false,
-    "number": 45282,
-    "review_comments_count": 3,
+    "number": 45327,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "[AMD CI] Fix torch.compile/export failures on AMD CI due to untraceable set.__contains__ ",
-    "updated_at": "2026-04-09T11:49:30Z"
+    "title": "[docs] modular transformers",
+    "updated_at": "2026-04-09T12:01:48Z"
   },
   {
-    "additions": 25,
-    "author": "zhang-prog",
+    "additions": 13,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### Description This PR fixes a boundary issue in the `_extract_polygon_points_by_masks` method of PP-DocLayoutV3. When running inference with a low confidence threshold, or due to coordinate clipping during scaling, the extracted `cropped\u2026",
-    "changed_files": 3,
+    "body_excerpt": "### What does this PR do? \u2192 This PR introduces compat fixes across several audio models to ensure they can be loaded and used by a companion vLLM PR. <ins>These changes are deliberate and are blocking</ins> [this vLLM PR](https://github.co\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45281",
-    "created_at": "2026-04-07T08:49:24Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45326",
+    "created_at": "2026-04-08T18:28:35Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45281/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45281",
+    "files_url": "https://github.com/huggingface/transformers/pull/45326/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45326",
     "labels": [],
-    "merged": true,
-    "number": 45281,
+    "merged": false,
+    "number": 45326,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix resize failure caused by zero-sized masks in PP-DocLayoutV3",
-    "updated_at": "2026-04-09T13:06:44Z"
+    "state": "open",
+    "title": "feat[vLLM \u00d7 v5]: Add vLLM compatibility for audio models",
+    "updated_at": "2026-04-14T15:06:53Z"
   },
   {
-    "additions": 2711,
-    "author": "marvinzh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? add Qianfan-OCR model definition - `QianfanOCRForConditionalGeneration` - image-text to text model definition - `QianfanOCRModel` - backbone of image-text to text model without lm heads - `QianfanOCRProcessor` - tex\u2026",
-    "changed_files": 18,
+    "additions": 234,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45276 and https://github.com/huggingface/transformers/issues/45335 In gemma4 per-layer inputs have to be resized as long as they aren't part of soft multimoda\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45280",
-    "created_at": "2026-04-07T06:49:34Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45324",
+    "created_at": "2026-04-08T17:06:26Z",
+    "deletions": 53,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45280/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45280",
+    "files_url": "https://github.com/huggingface/transformers/pull/45324/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45324",
     "labels": [],
     "merged": false,
-    "number": 45280,
-    "review_comments_count": 30,
+    "number": 45324,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "add Qianfan-OCR model definition",
-    "updated_at": "2026-04-13T03:56:15Z"
+    "title": "Gemma4 resizing per layer inputs",
+    "updated_at": "2026-04-13T09:14:48Z"
   },
   {
-    "additions": 43,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "additions": 128,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR fixes the issue raised in https://github.com/huggingface/transformers/pull/45274 . CUDA graph reuse in continuous batching used (num_q_tokens, max_kv_read) as the graph cache key. However, FlashAttention varlen kernels al\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45279",
-    "created_at": "2026-04-07T06:40:35Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45279/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45279",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45323",
+    "created_at": "2026-04-08T16:30:18Z",
+    "deletions": 67,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45323/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45323",
     "labels": [],
     "merged": false,
-    "number": 45279,
-    "review_comments_count": 3,
+    "number": 45323,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "add expert parallelism for gemma-4-26B-A4B-it",
-    "updated_at": "2026-04-13T00:52:58Z"
+    "title": "[CB] Fix capture of max_seqlen",
+    "updated_at": "2026-04-14T12:37:26Z"
   },
   {
-    "additions": 4,
-    "author": "kamalrajkannan78",
+    "additions": 20,
+    "author": "andrewor14",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? - Gemma3ForConditionalGeneration.forward & Gemma3ForSequenceClassification.forward calls self.model() without return_dict=True, so @can_return_tuple silently converts the output to a plain tuple, causing outputs.pas\u2026",
+    "body_excerpt": "**Summary:** TorchAO recently deprecated AffineQuantizedTensor and related classes (pytorch/ao#2752). These will be removed in the next release. We should remove references of these classes in transformers before then. **Test Plan:** ``` p\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45277",
-    "created_at": "2026-04-07T05:24:43Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45321",
+    "created_at": "2026-04-08T15:42:16Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45277/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45277",
+    "files_url": "https://github.com/huggingface/transformers/pull/45321/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45321",
     "labels": [],
-    "merged": true,
-    "number": 45277,
+    "merged": false,
+    "number": 45321,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix AttributeError in Gemma3ForConditionalGeneration and Gemma3ForSequenceClassification when config.return_dict=False",
-    "updated_at": "2026-04-10T10:37:16Z"
+    "state": "open",
+    "title": "Remove references to torchao's AffineQuantizedTensor",
+    "updated_at": "2026-04-09T12:21:03Z"
   },
   {
-    "additions": 9,
-    "author": "avarga1",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem `AutoConfig.from_pretrained(\"baidu/ERNIE-4.5-VL-28B-A3B-Paddle\", trust_remote_code=True)` raises errors that prevent the model from loading at all. Three separate bugs compound each other: ### Bug 1 \u2014 `model_type` mismatch (KeyE\u2026",
-    "changed_files": 3,
+    "additions": 5,
+    "author": "Regata3010",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in assisted generation when using model pairs with different vocabulary sizes but the same tokenizer family (e.g., Qwen2.5-7B + Qwen2.5-0.5B). `map_input_embeddings` is only initialized when `len(self\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45275",
-    "created_at": "2026-04-07T02:22:26Z",
-    "deletions": 5,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45320",
+    "created_at": "2026-04-08T15:30:16Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45275/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45275",
+    "files_url": "https://github.com/huggingface/transformers/pull/45320/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45320",
     "labels": [],
-    "merged": false,
-    "number": 45275,
-    "review_comments_count": 6,
+    "merged": true,
+    "number": 45320,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(ernie4_5_vl_moe): resolve three config loading failures for ERNIE-4.5-VL MoE models",
-    "updated_at": "2026-04-09T16:57:22Z"
+    "title": "Fix AttributeError in AssistantToTargetTranslator.unmap_input_ids with cross-vocab models",
+    "updated_at": "2026-04-10T17:46:37Z"
   },
   {
-    "additions": 215,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? 1. Fix: CUDA graph reuse for FA2 continuous batching was wrongly keyed causing quality collapse for specific configuration CUDA graph reuse used the wrong key: replay reuse depended on padded tensor sizes, but FA va\u2026",
-    "changed_files": 5,
+    "additions": 266,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - Removes `HUGGINGFACE_CO_STAGING` when downloading artifacts - adds a retry mechanism for external URLs (with partial file cleanup)",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45274",
-    "created_at": "2026-04-07T01:43:12Z",
-    "deletions": 36,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45319",
+    "created_at": "2026-04-08T14:51:48Z",
+    "deletions": 79,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45274/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45274",
+    "files_url": "https://github.com/huggingface/transformers/pull/45319/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45319",
     "labels": [],
     "merged": false,
-    "number": 45274,
-    "review_comments_count": 0,
+    "number": 45319,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "Fix CB Accuracy Regression under FA2",
-    "updated_at": "2026-04-09T03:47:47Z"
+    "title": "fix: dont download artifacts from the test hub",
+    "updated_at": "2026-04-09T15:48:19Z"
   },
   {
-    "additions": 2,
-    "author": "excepshenal",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Liger kernel unnecessarily materializes logits in VRAM during eval with `prediction_loss_only=True`, causing OOM. We explicitly tell Liger to `skip_logits`. <!-- Congratulations! You've made it this far! You're not\u2026",
+    "additions": 5,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? AutoTokenizer.register() adds classes to the global `REGISTERED_TOKENIZER_CLASSES` dict and some tests did not clean up behind them, leading to leaky state between tests",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45273",
-    "created_at": "2026-04-06T21:11:21Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45318",
+    "created_at": "2026-04-08T13:46:47Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45273/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45273",
+    "files_url": "https://github.com/huggingface/transformers/pull/45318/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45318",
     "labels": [],
-    "merged": false,
-    "number": 45273,
+    "merged": true,
+    "number": 45318,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: liger unnecessarily materializes logits in VRAM during eval, causing OOM",
-    "updated_at": "2026-04-09T15:32:29Z"
+    "state": "closed",
+    "title": "fix: leak in tokenizer registry for `test_processors`",
+    "updated_at": "2026-04-09T10:12:46Z"
   },
   {
-    "additions": 6,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Cc @ydshieh",
-    "changed_files": 1,
+    "additions": 24,
+    "author": "mohdfaour03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #45081 ## Problem Loading a Mistral tokenizer with `fix_mistral_regex=True` crashes because `_patch_mistral_regex` receives a raw `tokenizers.Tokenizer` but tries to access `.backend_tokenizer.pre_tokenizer` on it \u2014 that attribute on\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45272",
-    "created_at": "2026-04-06T20:39:29Z",
-    "deletions": 20,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45317",
+    "created_at": "2026-04-08T13:38:46Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45272/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45272",
+    "files_url": "https://github.com/huggingface/transformers/pull/45317/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45317",
     "labels": [],
     "merged": false,
-    "number": 45272,
-    "review_comments_count": 0,
+    "number": 45317,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Fix redundant logic in video processing SmolVLM",
-    "updated_at": "2026-04-06T20:49:13Z"
+    "title": "Fix AttributeError in _patch_mistral_regex when fix_mistral_regex=True  ",
+    "updated_at": "2026-04-09T13:52:30Z"
   },
   {
-    "additions": 90,
-    "author": "stevhliu",
+    "additions": 9,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "adds a separate vlm contribution doc for more visibility instead of being hidden in the Contribute to Transformers doc, and integration tests are covered in #45152",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? As per title and seems like there are no objections. Also added some colors in verbose logging cc @tarekziade @tomaarsen @yonigozlan if you have better ideas to style this (just tagging since you reacted \u2795 ) This is\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45271",
-    "created_at": "2026-04-06T18:56:42Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45316",
+    "created_at": "2026-04-08T13:01:15Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45271/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45271",
+    "files_url": "https://github.com/huggingface/transformers/pull/45316/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45316",
     "labels": [],
-    "merged": false,
-    "number": 45271,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "[docs] vlm addition",
-    "updated_at": "2026-04-07T18:22:18Z"
+    "merged": true,
+    "number": 45316,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Logger has `[transformers]` prefix in non-verbose mode",
+    "updated_at": "2026-04-14T14:08:04Z"
   },
   {
-    "additions": 162,
-    "author": "madhav1k",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Introduce logging of individual loss components when models return a dict of losses. - Add TrainingArguments.logging_loss_components flag to enable/disable this behavior. - Track per-component running sums with _tr_loss_components and aggr\u2026",
-    "changed_files": 3,
+    "additions": 46,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Reusing a variable name meant that we returned a softmaxed value instead of the original logits in some MoE routers. This generally did not affect inference, but could affect the auxiliary loss on MoE logits in training when the coefficien\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45270",
-    "created_at": "2026-04-06T18:38:51Z",
-    "deletions": 7,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45270/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45270",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45315",
+    "created_at": "2026-04-08T12:54:52Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45315/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45315",
     "labels": [],
     "merged": false,
-    "number": 45270,
+    "number": 45315,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Trainer] Support multi-loss component logging",
-    "updated_at": "2026-04-13T08:30:40Z"
+    "state": "closed",
+    "title": "Fix softmaxing router logits",
+    "updated_at": "2026-04-10T13:25:20Z"
   },
   {
-    "additions": 3,
-    "author": "ryota-komatsu",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix minor typos in `src/transformers/utils/output_capturing.py`: - `install_output_capuring_hook` \u2192 `install_output_capturing_hook` (lines 98, 147) - `Tis` \u2192 `This` (line 152) ## Before submitting - [x] This PR fixe\u2026",
-    "changed_files": 1,
+    "additions": 18,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? fixes https://github.com/huggingface/transformers/issues/45216 and https://github.com/huggingface/transformers/issues/45310 and https://github.com/huggingface/transformers/issues/45313 TBH load-save-load works for t\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45269",
-    "created_at": "2026-04-06T18:01:42Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45269/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45269",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45314",
+    "created_at": "2026-04-08T11:54:53Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45314/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45314",
     "labels": [],
     "merged": false,
-    "number": 45269,
+    "number": 45314,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix typos in src/transformers/utils/output_capturing.py",
-    "updated_at": "2026-04-06T20:49:06Z"
+    "state": "closed",
+    "title": "Conversion for LLM class loading with VLM ckpt ",
+    "updated_at": "2026-04-10T09:18:26Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh",
+    "additions": 61,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix `Qwen2IntegrationTest`: - `test_speculative_generation`: - `0c89522f`: #43794 changed seed, so the actual output changed, but the expected output is not updated (cc @tarekziade more attention next time \ud83d\ude04 ) - `af\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. It was confirmed that the weight matrices of shared layers are NEVER used, and that kv states should ALWAYS be shared, even during training or inference without Cache. I will fully remove them on a\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45268",
-    "created_at": "2026-04-06T18:01:38Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45312",
+    "created_at": "2026-04-08T11:33:33Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45268/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45268",
+    "files_url": "https://github.com/huggingface/transformers/pull/45312/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45312",
     "labels": [],
     "merged": true,
-    "number": 45268,
-    "review_comments_count": 2,
+    "number": 45312,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `Qwen2IntegrationTest`",
-    "updated_at": "2026-04-08T07:56:35Z"
+    "title": "[gemma4] Dissociate kv states sharing from the Cache",
+    "updated_at": "2026-04-09T08:08:07Z"
   },
   {
-    "additions": 108,
-    "author": "KetanP1618",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds a missing docstring to the `FFN.forward` method in DistilBERT. The `FFN.forward` method in `modeling_distilbert.py` had no documentation at all. This PR adds proper Args and Returns sections following the exis\u2026",
+    "additions": 2,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45292 (seems to come from #41580) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45267",
-    "created_at": "2026-04-06T17:59:13Z",
-    "deletions": 39,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45311",
+    "created_at": "2026-04-08T10:38:34Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45267/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45267",
+    "files_url": "https://github.com/huggingface/transformers/pull/45311/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45311",
     "labels": [],
     "merged": false,
-    "number": 45267,
+    "number": 45311,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add docstring to FFN.forward in DistilBERT",
-    "updated_at": "2026-04-08T13:43:57Z"
+    "title": "resize_token_embeddings does not effect to output_embeddings",
+    "updated_at": "2026-04-13T13:55:03Z"
   },
   {
-    "additions": 125,
-    "author": "KetanP1618",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds missing docstrings to two undocumented `forward` methods in the ALBERT model: - `AlbertMLMHead.forward` - Added Args and Returns sections - `AlbertSOPHead.forward` - Added Args and Returns sections Both method\u2026",
-    "changed_files": 1,
+    "additions": 301,
+    "author": "agentspan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #45290. `ProcessorMixin.apply_chat_template` and several related code paths assumed every message in a conversation has a `content` key. Assistant messages with `tool_calls` and no textual content (a valid shape per the Op\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45266",
-    "created_at": "2026-04-06T17:41:14Z",
-    "deletions": 41,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45309",
+    "created_at": "2026-04-08T08:40:08Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45266/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45266",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45309/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45309",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45266,
+    "number": 45309,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add docstrings to AlbertMLMHead and AlbertSOPHead forward methods",
-    "updated_at": "2026-04-08T13:43:34Z"
+    "title": "Fix KeyError in apply_chat_template when message has no content (#45290)",
+    "updated_at": "2026-04-08T11:30:37Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? **Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.** (#45248) breaks when running on torch 2.8 or older ...",
+    "additions": 10,
+    "author": "juliabush",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #29942 Flash Attention 2 inference equivalence tests for Whisper can fail due to higher numerical variance compared to the eager attention implementation. This PR increases the tolerance (`atol`, `rtol`) spec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45263",
-    "created_at": "2026-04-06T10:06:40Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45303",
+    "created_at": "2026-04-07T21:37:00Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45263/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45263",
-    "labels": [],
-    "merged": true,
-    "number": 45263,
+    "files_url": "https://github.com/huggingface/transformers/pull/45303/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45303",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45303,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `hasattr(torch.backends.cudnn, \"conv\")` to `conftest.py`",
-    "updated_at": "2026-04-06T19:46:18Z"
+    "title": "Fix FA2 inference equivalence failures for Whisper (closes #29942)",
+    "updated_at": "2026-04-08T14:42:36Z"
   },
   {
-    "additions": 1,
-    "author": "lowzhao",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix docstring spelling mistake TokenizersBackend.convert_to_native_format. ```python @classmethod def convert_to_native_format(cls, trust_remote_code=False, **kwargs): \"\"\"s <---- additional s ``` Likely caused by mi\u2026",
+    "additions": 7,
+    "author": "jagwar",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Security Fix Fixes a trust check bypass in `trl-ci-bot.yml` that allowed any GitHub user to trigger TRL CI on self-hosted GPU runners by commenting `/trl-ci` on any PR. ### The bug The \"Ignore untrusted commenter\" step used `exit 0`, wh\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45262",
-    "created_at": "2026-04-06T08:41:40Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45302",
+    "created_at": "2026-04-07T21:35:38Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45262/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45262",
+    "files_url": "https://github.com/huggingface/transformers/pull/45302/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45302",
     "labels": [],
     "merged": true,
-    "number": 45262,
+    "number": 45302,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "doc: fix TokenizersBackend.convert_to_native_format docstring",
-    "updated_at": "2026-04-06T16:32:44Z"
+    "title": "fix(security): prevent untrusted users from triggering TRL CI dispatch",
+    "updated_at": "2026-04-07T21:59:38Z"
   },
   {
-    "additions": 22,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What this PR does It's working, see https://github.com/huggingface/transformers/actions/runs/24025915210 (the failing job in this PR is because the workflow needs to be on `main` to be effective).",
-    "changed_files": 1,
+    "additions": 0,
+    "author": "sahildando",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45261",
-    "created_at": "2026-04-06T07:52:29Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45301",
+    "created_at": "2026-04-07T21:12:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45261/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45261",
+    "files_url": "https://github.com/huggingface/transformers/pull/45301/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45301",
     "labels": [],
-    "merged": true,
-    "number": 45261,
+    "merged": false,
+    "number": 45301,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "empty",
-    "updated_at": "2026-04-06T09:24:59Z"
+    "title": " docs maintenance for transformers repository 979e8",
+    "updated_at": "2026-04-09T13:28:27Z"
   },
   {
-    "additions": 18,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary PR #43514 refactored `_preprocess` to pass `resample=resample` to `resize`, but the `resize` method in `SmolVLMVideoProcessor` still had `interpolation` as its parameter name. The `resample` kwarg was silently swallowed by `**kw\u2026",
-    "changed_files": 2,
+    "additions": 136,
+    "author": "w4nderlust",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Nemotron-H models use standalone MLP layers in their `hybrid_override_pattern` (the `-` character), but the config parser, validators, and modeling code only know about `mamba`/`attention`/`moe`. This means every Ne\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45258",
-    "created_at": "2026-04-06T05:09:09Z",
-    "deletions": 7,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45300",
+    "created_at": "2026-04-07T20:57:45Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45258/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45258",
+    "files_url": "https://github.com/huggingface/transformers/pull/45300/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45300",
     "labels": [],
-    "merged": true,
-    "number": 45258,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "Fix `SmolVLM` video processor `resize` using wrong interpolation after backend refactor",
-    "updated_at": "2026-04-06T20:40:22Z"
+    "merged": false,
+    "number": 45300,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix Nemotron-H: add mlp layer type support",
+    "updated_at": "2026-04-09T16:16:30Z"
   },
   {
-    "additions": 269,
-    "author": "lucianommartins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# [Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility ## What does this PR do? Rewrites the `_patch_template_for_openai_tool_role()` function in `convert_gemma4_weights.py` to fully support OpenAI Chat Completi\u2026",
+    "additions": 3,
+    "author": "partacc",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Some CI security tests... I will contact you privately if there is anything worth reporting. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear i\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45257",
-    "created_at": "2026-04-05T22:07:53Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45257/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45257",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45299",
+    "created_at": "2026-04-07T20:07:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45299/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45299",
     "labels": [],
     "merged": false,
-    "number": 45257,
+    "number": 45299,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility",
-    "updated_at": "2026-04-10T13:28:04Z"
+    "title": "[Please ignore] CI Test PR",
+    "updated_at": "2026-04-07T20:23:28Z"
   },
   {
-    "additions": 39,
-    "author": "zozo123",
+    "additions": 2958,
+    "author": "DatLe203",
     "author_association": "NONE",
-    "body_excerpt": "## Summary When saving a Qwen3.5 VL model via `save_pretrained`, the `revert_weight_conversion` for `qwen3_5_text` replaces a leading `model.` segment. This wrongly matches keys that already start with `model.language_model.` on composite\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45256",
-    "created_at": "2026-04-05T19:00:26Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45298",
+    "created_at": "2026-04-07T19:39:50Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45256/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45256",
+    "files_url": "https://github.com/huggingface/transformers/pull/45298/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45298",
     "labels": [],
     "merged": false,
-    "number": 45256,
+    "number": 45298,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: skip qwen3_5_text checkpoint remap for nested VL language_model",
-    "updated_at": "2026-04-06T18:50:06Z"
+    "title": "Add new qwen2 5 vl",
+    "updated_at": "2026-04-07T20:00:43Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - `tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_model_450m_logits`: - failed due to 6217adc6c8f0be7b5374e6a46129ad2214e4c6ed - `tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForCondi\u2026",
-    "changed_files": 0,
+    "additions": 21,
+    "author": "EhteshamSid",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes mutable default argument bugs in four quantization config `__init__` methods inside `quantization_config.py`. In Python, mutable objects used as default argument values (e.g. `=[]`, `={}`) are created once at\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45254",
-    "created_at": "2026-04-05T18:20:02Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45297",
+    "created_at": "2026-04-07T18:40:05Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45254/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45254",
+    "files_url": "https://github.com/huggingface/transformers/pull/45297/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45297",
     "labels": [],
     "merged": false,
-    "number": 45254,
+    "number": 45297,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix more integration tests for important models",
-    "updated_at": "2026-04-06T05:23:46Z"
+    "state": "closed",
+    "title": "Fix mutable default arguments in quantization config classes",
+    "updated_at": "2026-04-09T14:00:25Z"
   },
   {
-    "additions": 33,
-    "author": "Charly21r",
+    "additions": 191,
+    "author": "UsamaKenway",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a bug where `use_cache=False` produces garbage logits in Gemma 4 models due to broken KV sharing between layers. Fixes #45242 ## Root cause of the issue Gemma 4 introduces two architectural features not presen\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45253",
-    "created_at": "2026-04-05T18:12:08Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45296",
+    "created_at": "2026-04-07T18:39:33Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45253/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45253",
+    "files_url": "https://github.com/huggingface/transformers/pull/45296/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45296",
     "labels": [],
     "merged": false,
-    "number": 45253,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Gemma4 `use_cache=False` producing bad logits",
-    "updated_at": "2026-04-09T08:17:08Z"
+    "number": 45296,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Add GGUF support to Gemma4 (31B & 26B-A4B) text ",
+    "updated_at": "2026-04-12T10:53:54Z"
   },
   {
-    "additions": 4,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #43166 used `torch.set_float32_matmul_precision(\"high\")` which causes (likely) TF32 being used > \u201chigh\u201d, float32 matrix multiplications either use the TensorFloat32 datatype (10 mantissa bits explicitly stored) or t\u2026",
-    "changed_files": 1,
+    "additions": 116,
+    "author": "jesperschlegel",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Adds the Gemma4ForSequenceClassification class which supports finetuning Gemma 4 models on sequence classification tasks.",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45252",
-    "created_at": "2026-04-05T16:51:29Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45294",
+    "created_at": "2026-04-07T17:51:38Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45252/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45252",
+    "files_url": "https://github.com/huggingface/transformers/pull/45294/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45294",
     "labels": [],
-    "merged": true,
-    "number": 45252,
+    "merged": false,
+    "number": 45294,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix unexpected TF32 being enabled in testing",
-    "updated_at": "2026-04-07T10:14:50Z"
+    "state": "open",
+    "title": "feat: add Gemma4ForSequenceClassification",
+    "updated_at": "2026-04-13T13:05:41Z"
   },
   {
-    "additions": 91,
-    "author": "balgaly",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem `torch.multinomial` rejects last dimensions >= 2**24. Beam search with `do_sample=True` builds a flat distribution of size `num_beams * vocab_size`, which can exceed that limit (e.g. large beams + ~164k vocab), crashing during g\u2026",
+    "additions": 4,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Fix global state leak in `AutoTokenizer.register` causing test failures ### Problem `test_from_pretrained_dynamic_processor` was failing when run as part of the full test class with: ``` AttributeError: NewTokenizer has no attribute spe\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45251",
-    "created_at": "2026-04-05T15:38:58Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45293",
+    "created_at": "2026-04-07T16:29:25Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45251/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45251",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45293/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45293",
+    "labels": [],
     "merged": false,
-    "number": 45251,
+    "number": 45293,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Fix \"AttributeError: NewTokenizer has no attribute special_attribute_present\" (Remove `REGISTERED_FAST_ALIASES`)",
+    "updated_at": "2026-04-09T12:04:21Z"
+  },
+  {
+    "additions": 2958,
+    "author": "DatLe203",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45291",
+    "created_at": "2026-04-07T14:30:38Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45291/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45291",
+    "labels": [],
+    "merged": false,
+    "number": 45291,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(generation): beam sample when num_beams * vocab_size exceeds multinomial limit",
-    "updated_at": "2026-04-08T14:50:40Z"
+    "title": "First pull request",
+    "updated_at": "2026-04-07T14:53:52Z"
   },
   {
-    "additions": 16,
-    "author": "ydshieh",
+    "additions": 24,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? PR #42428 change the way to enable / disable torch's TF32 using torch new API. It turns out set > torch.backends.fp32_precision = False would still have > torch.backends.cudnn.conv.fp32_precision = \"tf32\" > torch.ba\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45265 and doesn't warn if the value is an int.",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45248",
-    "created_at": "2026-04-05T07:51:44Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45289",
+    "created_at": "2026-04-07T13:00:52Z",
+    "deletions": 73,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45248/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45248",
+    "files_url": "https://github.com/huggingface/transformers/pull/45289/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45289",
     "labels": [],
     "merged": true,
-    "number": 45248,
-    "review_comments_count": 2,
+    "number": 45289,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.",
-    "updated_at": "2026-04-10T21:48:56Z"
+    "title": "Less unnecessary RoPE warnings",
+    "updated_at": "2026-04-13T13:14:41Z"
   },
   {
-    "additions": 3,
-    "author": "gagandhakrey",
-    "author_association": "NONE",
-    "body_excerpt": "\u2026 validation # What does this PR do? Problem The invert_attention_mask function in src/transformers/modeling_utils.py crashed with an UnboundLocalError when given an encoder_attention_mask shape that wasn't exactly 2D or 3D. Because it onl\u2026",
+    "additions": 35,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45247",
-    "created_at": "2026-04-05T03:45:23Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45247/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45247",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45288",
+    "created_at": "2026-04-07T10:24:53Z",
+    "deletions": 11,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45288/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45288",
     "labels": [],
     "merged": false,
-    "number": 45247,
-    "review_comments_count": 1,
+    "number": 45288,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix UnboundLocalError in invert_attention_mask by adding proper shape\u2026",
-    "updated_at": "2026-04-09T14:11:43Z"
+    "title": "fix(cohere_asr): auto-fix failing tests",
+    "updated_at": "2026-04-10T10:41:27Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Let's CI go great!!!!",
-    "changed_files": 0,
+    "additions": 102,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45244",
-    "created_at": "2026-04-04T18:52:19Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45287",
+    "created_at": "2026-04-07T10:23:45Z",
     "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45244/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45244",
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45287/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45287",
     "labels": [],
     "merged": false,
-    "number": 45244,
+    "number": 45287,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Let's CI go great",
-    "updated_at": "2026-04-04T19:59:28Z"
+    "state": "closed",
+    "title": "fix(videomt): auto-fix failing tests",
+    "updated_at": "2026-04-10T10:41:30Z"
   },
   {
-    "additions": 2,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Use torch 2.11 for our (daily) CI since it's released for 2 weeks already. For CircleCI, we need to fix something regarding `torchvision.io.read_video`. For daily CI, torch 2.11 doesn't cause issues (for those `torc\u2026",
+    "additions": 29,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45243",
-    "created_at": "2026-04-04T18:09:27Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45243/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45243",
-    "labels": [],
-    "merged": true,
-    "number": 45243,
-    "review_comments_count": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45286",
+    "created_at": "2026-04-07T10:22:51Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45286/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45286",
+    "labels": [],
+    "merged": false,
+    "number": 45286,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Nvidia CI with `torch 2.11`",
-    "updated_at": "2026-04-04T18:48:45Z"
+    "title": "fix(nomic_bert): auto-fix failing tests",
+    "updated_at": "2026-04-10T10:41:28Z"
   },
   {
-    "additions": 523,
-    "author": "ydshieh",
+    "additions": 153,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? After the series of fixes in other previous PRs, we can now update the tiny model creation script. This update makes the script running without any failure, just 10 warnings. There are many # TODO, some of them may\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45241",
-    "created_at": "2026-04-04T12:30:35Z",
-    "deletions": 164,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45285",
+    "created_at": "2026-04-07T09:18:11Z",
+    "deletions": 329,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45241/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45241",
+    "files_url": "https://github.com/huggingface/transformers/pull/45285/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45285",
     "labels": [],
     "merged": true,
-    "number": 45241,
-    "review_comments_count": 0,
+    "number": 45285,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Update tiny model creation script",
-    "updated_at": "2026-04-04T17:19:34Z"
+    "title": "Fix export for gemma4 and add Integration tests",
+    "updated_at": "2026-04-08T13:07:40Z"
   },
   {
-    "additions": 3,
-    "author": "shhKnight30",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Type checking for `PreTrainedConfig` subclasses broke in v5.4.0 and this fixes it. The culprit is `wrap_init_to_accept_kwargs` \u2014 it swaps out the dataclass-generated `__init__` with a `(**kwargs: Any)` wrapper at r\u2026",
+    "additions": 4,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "Following this PR : https://github.com/huggingface/transformers/pull/45268. This PR fix Qwen2 expectations for AMD.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45240",
-    "created_at": "2026-04-04T10:29:57Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45284",
+    "created_at": "2026-04-07T09:12:01Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45240/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45240",
+    "files_url": "https://github.com/huggingface/transformers/pull/45284/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45284",
     "labels": [],
     "merged": true,
-    "number": 45240,
+    "number": 45284,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: restore mypy type checking for PreTrainedConfig subclasses (#45071)",
-    "updated_at": "2026-04-10T11:10:27Z"
+    "title": "[AMD CI] Fix Qwen2 expectations",
+    "updated_at": "2026-04-07T12:01:58Z"
   },
   {
-    "additions": 28,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We have introduced `CausalLMModelTest` for some time, but haven't update `get_test_info.py` accordingly, which causes some issues, in particularly for tiny model creation, regarding the part of the attribute `all_mo\u2026",
-    "changed_files": 1,
+    "additions": 107,
+    "author": "jIab-b",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45238",
-    "created_at": "2026-04-04T07:25:15Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45283",
+    "created_at": "2026-04-07T09:11:14Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45238/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45238",
+    "files_url": "https://github.com/huggingface/transformers/pull/45283/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45283",
     "labels": [],
-    "merged": true,
-    "number": 45238,
+    "merged": false,
+    "number": 45283,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Update `get_test_info.py` (related to tiny model creation)",
-    "updated_at": "2026-04-04T07:40:21Z"
+    "state": "open",
+    "title": "Add Qwen3.5 GGUF loading support",
+    "updated_at": "2026-04-09T14:49:22Z"
   },
   {
-    "additions": 2,
-    "author": "KoichiYasuoka",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes the bug in `resize_token_embeddings` (may occur python3.10 or after). Quick reproduce: ``` from transformers import AutoModelForMaskedLM mdl = AutoModelForMaskedLM.from_pretrained(\"bert-base-uncased\") f = mdl.\u2026",
-    "changed_files": 1,
+    "additions": 30,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "`_register_model_output_pytree_node` was calling set.__contains__ during TorchDynamo tracing, which is unsupported in PyTorch 2.8.0 (ROCm). Added an early return when `torch.compiler.is_compiling()` is True, since pytree nodes are already\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45236",
-    "created_at": "2026-04-04T07:05:53Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45282",
+    "created_at": "2026-04-07T08:50:54Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45236/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45236",
+    "files_url": "https://github.com/huggingface/transformers/pull/45282/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45282",
     "labels": [],
-    "merged": false,
-    "number": 45236,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45282,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "resize_token_embeddings does not resize lm_head",
-    "updated_at": "2026-04-07T00:55:39Z"
+    "title": "[AMD CI] Fix torch.compile/export failures on AMD CI due to untraceable set.__contains__ ",
+    "updated_at": "2026-04-13T15:59:55Z"
   },
   {
-    "additions": 102,
-    "author": "kallewoof",
+    "additions": 25,
+    "author": "zhang-prog",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds a tiny \"agnostic.gpu\" utility that is meant to allow easy replacing of unnecessarily hard-coded vendor-specific code. The code does not use `torch.accelerator` as it is still considered experimental, bu\u2026",
-    "changed_files": 8,
+    "body_excerpt": "### Description This PR fixes a boundary issue in the `_extract_polygon_points_by_masks` method of PP-DocLayoutV3. When running inference with a low confidence threshold, or due to coordinate clipping during scaling, the extracted `cropped\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45235",
-    "created_at": "2026-04-04T06:08:22Z",
-    "deletions": 13,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45235/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45235",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45281",
+    "created_at": "2026-04-07T08:49:24Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45281/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45281",
     "labels": [],
-    "merged": false,
-    "number": 45235,
+    "merged": true,
+    "number": 45281,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat/rfc/poc: Agnostic GPU",
-    "updated_at": "2026-04-09T14:34:20Z"
+    "title": "Fix resize failure caused by zero-sized masks in PP-DocLayoutV3",
+    "updated_at": "2026-04-09T13:06:44Z"
   },
   {
-    "additions": 306,
-    "author": "pdufour",
+    "additions": 2509,
+    "author": "marvinzh",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This fixes an issues with the model that made it incompatible with exporting to onnx. Specifically the following has been changed: 1. if condition ``` if input_len < context_len: ``` Will give this error when you tr\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? add Qianfan-OCR model definition - `QianfanOCRForConditionalGeneration` - image-text to text model definition - `QianfanOCRModel` - backbone of image-text to text model without lm heads - `QianfanOCRProcessor` - tex\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45233",
-    "created_at": "2026-04-04T00:15:04Z",
-    "deletions": 98,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45280",
+    "created_at": "2026-04-07T06:49:34Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45233/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45233",
+    "files_url": "https://github.com/huggingface/transformers/pull/45280/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45280",
     "labels": [],
     "merged": false,
-    "number": 45233,
-    "review_comments_count": 4,
+    "number": 45280,
+    "review_comments_count": 55,
     "state": "open",
-    "title": "feat: make timesfm2_5 onnx export compatible",
-    "updated_at": "2026-04-10T23:37:52Z"
+    "title": "add Qianfan-OCR model definition",
+    "updated_at": "2026-04-14T15:10:01Z"
   },
   {
-    "additions": 439,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "adds docs for static model rules so model contributors are aware of expectations > I wonder if it wouldn't make sense to auto generate that portion of the doc automatically added from @tarekziade feedback: - reformats `format_rule_details(\u2026",
-    "changed_files": 8,
+    "additions": 43,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45232",
-    "created_at": "2026-04-03T22:41:22Z",
-    "deletions": 146,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45279",
+    "created_at": "2026-04-07T06:40:35Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45232/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45232",
+    "files_url": "https://github.com/huggingface/transformers/pull/45279/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45279",
     "labels": [],
-    "merged": true,
-    "number": 45232,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "[docs] static model rules",
-    "updated_at": "2026-04-08T15:06:17Z"
+    "merged": false,
+    "number": 45279,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "add expert parallelism for gemma-4-26B-A4B-it",
+    "updated_at": "2026-04-14T13:32:10Z"
   },
   {
-    "additions": 20,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? All of this are trivial ... (maybe except \"evolla\")",
-    "changed_files": 10,
+    "additions": 4,
+    "author": "kamalrajkannan78",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - Gemma3ForConditionalGeneration.forward & Gemma3ForSequenceClassification.forward calls self.model() without return_dict=True, so @can_return_tuple silently converts the output to a plain tuple, causing outputs.pas\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45228",
-    "created_at": "2026-04-03T18:17:29Z",
-    "deletions": 12,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45277",
+    "created_at": "2026-04-07T05:24:43Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45228/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45228",
+    "files_url": "https://github.com/huggingface/transformers/pull/45277/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45277",
     "labels": [],
     "merged": true,
-    "number": 45228,
+    "number": 45277,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "More fix for tiny model creation",
-    "updated_at": "2026-04-03T19:25:51Z"
+    "title": "Fix AttributeError in Gemma3ForConditionalGeneration and Gemma3ForSequenceClassification when config.return_dict=False",
+    "updated_at": "2026-04-10T10:37:16Z"
   },
   {
-    "additions": 3,
-    "author": "akhilc08",
+    "additions": 9,
+    "author": "avarga1",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Remove the unused `PILImageResampling` runtime import from `video_processing_utils.py` which causes an `ImportError` when Pillow is not installed - Also remove the now-unused `is_vision_available` import that only guarded the\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Problem `AutoConfig.from_pretrained(\"baidu/ERNIE-4.5-VL-28B-A3B-Paddle\", trust_remote_code=True)` raises errors that prevent the model from loading at all. Three separate bugs compound each other: ### Bug 1 \u2014 `model_type` mismatch (KeyE\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45227",
-    "created_at": "2026-04-03T17:44:25Z",
-    "deletions": 7,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45275",
+    "created_at": "2026-04-07T02:22:26Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45227/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45227",
+    "files_url": "https://github.com/huggingface/transformers/pull/45275/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45275",
     "labels": [],
     "merged": false,
-    "number": 45227,
-    "review_comments_count": 0,
+    "number": 45275,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "fix: remove nonexistent PILImageResampling import from video_processing_utils",
-    "updated_at": "2026-04-05T17:04:49Z"
+    "title": "fix(ernie4_5_vl_moe): resolve three config loading failures for ERNIE-4.5-VL MoE models",
+    "updated_at": "2026-04-09T16:57:22Z"
   },
   {
-    "additions": 3,
-    "author": "akhilc08",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes an `IndexError: string index out of range` crash in `_split_tokens_on_unicode()` when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD) - Adds a bounds check so that when `unicode_offse\u2026",
-    "changed_files": 1,
+    "additions": 215,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? 1. Fix: CUDA graph reuse for FA2 continuous batching was wrongly keyed causing quality collapse for specific configuration CUDA graph reuse used the wrong key: replay reuse depended on padded tensor sizes, but FA va\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45226",
-    "created_at": "2026-04-03T17:43:21Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45274",
+    "created_at": "2026-04-07T01:43:12Z",
+    "deletions": 36,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45226/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45226",
+    "files_url": "https://github.com/huggingface/transformers/pull/45274/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45274",
     "labels": [],
     "merged": false,
-    "number": 45226,
+    "number": 45274,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle trailing replacement character in Whisper word timestamp decoding",
-    "updated_at": "2026-04-05T17:04:49Z"
+    "state": "open",
+    "title": "Fix CB Accuracy Regression under FA2",
+    "updated_at": "2026-04-09T03:47:47Z"
   },
   {
-    "additions": 6,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The `dev` extra now indirectly pulls hf-doc-builder so the install step failed. We also need to update to current main for the latest features",
+    "additions": 2,
+    "author": "excepshenal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Liger kernel unnecessarily materializes logits in VRAM during eval with `prediction_loss_only=True`, causing OOM. We explicitly tell Liger to `skip_logits`. <!-- Congratulations! You've made it this far! You're not\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45225",
-    "created_at": "2026-04-03T16:46:54Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45273",
+    "created_at": "2026-04-06T21:11:21Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45225/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45225",
+    "files_url": "https://github.com/huggingface/transformers/pull/45273/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45273",
     "labels": [],
-    "merged": true,
-    "number": 45225,
+    "merged": false,
+    "number": 45273,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: hf-doc-builder insallation was failing",
-    "updated_at": "2026-04-03T17:02:09Z"
+    "state": "open",
+    "title": "fix: liger unnecessarily materializes logits in VRAM during eval, causing OOM",
+    "updated_at": "2026-04-09T15:32:29Z"
   },
   {
-    "additions": 2,
-    "author": "ydshieh",
+    "additions": 6,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix for tiny model",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45224",
-    "created_at": "2026-04-03T16:39:23Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45224/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45224",
-    "labels": [],
-    "merged": true,
-    "number": 45224,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "remove unnecessary entries in some auto model mappings",
-    "updated_at": "2026-04-03T17:26:24Z"
-  },
-  {
-    "additions": 17,
-    "author": "Talhax55z",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #31356 ## What does this PR do? The `postprocess` method in `ObjectDetectionPipeline` was hardcoding `raw_annotations[0]`, which caused batch inference to only return results for the first image, ignoring all others. This PR replaces\u2026",
+    "body_excerpt": "Cc @ydshieh",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45223",
-    "created_at": "2026-04-03T16:36:25Z",
-    "deletions": 15,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45272",
+    "created_at": "2026-04-06T20:39:29Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45223/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45223",
+    "files_url": "https://github.com/huggingface/transformers/pull/45272/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45272",
     "labels": [],
     "merged": false,
-    "number": 45223,
+    "number": 45272,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: ObjectDetectionPipeline batch inference only returns first image results",
-    "updated_at": "2026-04-11T08:38:42Z"
+    "title": "Fix redundant logic in video processing SmolVLM",
+    "updated_at": "2026-04-06T20:49:13Z"
   },
   {
-    "additions": 12,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary When using Gemma 3 or Gemma 4 for text-only supervised fine-tuning (no images), the forward pass raises a `ValueError` because `token_type_ids` / `mm_token_type_ids` is not provided. This happens because `AutoTokenizer` does not\u2026",
-    "changed_files": 4,
+    "additions": 90,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds a separate vlm contribution doc for more visibility instead of being hidden in the Contribute to Transformers doc, and integration tests are covered in #45152",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45222",
-    "created_at": "2026-04-03T16:27:31Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45271",
+    "created_at": "2026-04-06T18:56:42Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45222/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45222",
+    "files_url": "https://github.com/huggingface/transformers/pull/45271/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45271",
     "labels": [],
     "merged": false,
-    "number": 45222,
-    "review_comments_count": 0,
+    "number": 45271,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "fix(gemma3, gemma4): default token_type_ids to zeros for text-only training",
-    "updated_at": "2026-04-07T11:47:12Z"
+    "title": "[docs] vlm addition",
+    "updated_at": "2026-04-07T18:22:18Z"
   },
   {
-    "additions": 6,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title",
-    "changed_files": 1,
+    "additions": 162,
+    "author": "madhav1k",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Introduce logging of individual loss components when models return a dict of losses. - Add TrainingArguments.logging_loss_components flag to enable/disable this behavior. - Track per-component running sums with _tr_loss_components and aggr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45221",
-    "created_at": "2026-04-03T15:49:24Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45221/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45221",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45270",
+    "created_at": "2026-04-06T18:38:51Z",
+    "deletions": 7,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45270/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45270",
     "labels": [],
     "merged": false,
-    "number": 45221,
+    "number": 45270,
     "review_comments_count": 0,
     "state": "open",
-    "title": "user friendly error when loading audio from video",
-    "updated_at": "2026-04-09T14:16:24Z"
+    "title": "[Trainer] Support multi-loss component logging",
+    "updated_at": "2026-04-13T08:30:40Z"
   },
   {
-    "additions": 346,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds transformers serve compatibility to multimodal models like qwen omni or gemma 4. We add support for audio with chat completion and response though `input_audio` -> the client need to base64-encode the a\u2026",
-    "changed_files": 5,
+    "additions": 3,
+    "author": "ryota-komatsu",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix minor typos in `src/transformers/utils/output_capturing.py`: - `install_output_capuring_hook` \u2192 `install_output_capturing_hook` (lines 98, 147) - `Tis` \u2192 `This` (line 152) ## Before submitting - [x] This PR fixe\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45220",
-    "created_at": "2026-04-03T14:16:33Z",
-    "deletions": 37,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45269",
+    "created_at": "2026-04-06T18:01:42Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45220/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45220",
+    "files_url": "https://github.com/huggingface/transformers/pull/45269/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45269",
     "labels": [],
     "merged": false,
-    "number": 45220,
-    "review_comments_count": 5,
+    "number": 45269,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Multimodal serve support ",
-    "updated_at": "2026-04-03T19:57:08Z"
+    "title": "Fix typos in src/transformers/utils/output_capturing.py",
+    "updated_at": "2026-04-06T20:49:06Z"
   },
   {
     "additions": 3,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? google/gemma-4-26B-A4B-it tp 2, memory is 46G per rank wo the change, drop to about 25G w per rank with the change - text models: @ArthurZucker @Cyrilvallez",
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix `Qwen2IntegrationTest`: - `test_speculative_generation`: - `0c89522f`: #43794 changed seed, so the actual output changed, but the expected output is not updated (cc @tarekziade more attention next time \ud83d\ude04 ) - `af\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45219",
-    "created_at": "2026-04-03T13:59:02Z",
-    "deletions": 0,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45268",
+    "created_at": "2026-04-06T18:01:38Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45219/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45219",
+    "files_url": "https://github.com/huggingface/transformers/pull/45268/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45268",
     "labels": [],
     "merged": true,
-    "number": 45219,
-    "review_comments_count": 0,
+    "number": 45268,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Add MoE to Gemma4 TP plan",
-    "updated_at": "2026-04-08T14:09:40Z"
+    "title": "Fix `Qwen2IntegrationTest`",
+    "updated_at": "2026-04-08T07:56:35Z"
   },
   {
-    "additions": 4057,
-    "author": "LysandreJik",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR offers a new, Agentic surface for transformers. It tries to apply what is done elsewhere with CLIs to `transformers`, leveraging many current use-cases of `transformers` and exposing them as CLI endpoints. I recommend reading this\u2026",
-    "changed_files": 15,
+    "additions": 108,
+    "author": "KetanP1618",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds a missing docstring to the `FFN.forward` method in DistilBERT. The `FFN.forward` method in `modeling_distilbert.py` had no documentation at all. This PR adds proper Args and Returns sections following the exis\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45218",
-    "created_at": "2026-04-03T13:31:08Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45218/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45218",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45267",
+    "created_at": "2026-04-06T17:59:13Z",
+    "deletions": 39,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45267/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45267",
     "labels": [],
     "merged": false,
-    "number": 45218,
+    "number": 45267,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Proposal: Agent-first CLI",
-    "updated_at": "2026-04-06T12:42:44Z"
+    "title": "Add docstring to FFN.forward in DistilBERT",
+    "updated_at": "2026-04-08T13:43:57Z"
   },
   {
-    "additions": 5,
-    "author": "ENg-122",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Remove unnecessary masked_fill_(mask, 0) call in torch_chunk_gated_delta_rule. The decay_mask computed earlier already encodes the causal/lower-triangular structure (upper-triangle values are zero), so masking the a\u2026",
-    "changed_files": 5,
+    "additions": 125,
+    "author": "KetanP1618",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds missing docstrings to two undocumented `forward` methods in the ALBERT model: - `AlbertMLMHead.forward` - Added Args and Returns sections - `AlbertSOPHead.forward` - Added Args and Returns sections Both method\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45215",
-    "created_at": "2026-04-03T09:08:28Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45266",
+    "created_at": "2026-04-06T17:41:14Z",
+    "deletions": 41,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45215/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45215",
+    "files_url": "https://github.com/huggingface/transformers/pull/45266/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45266",
+    "labels": [],
+    "merged": false,
+    "number": 45266,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add docstrings to AlbertMLMHead and AlbertSOPHead forward methods",
+    "updated_at": "2026-04-08T13:43:34Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? **Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.** (#45248) breaks when running on torch 2.8 or older ...",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45263",
+    "created_at": "2026-04-06T10:06:40Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45263/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45263",
     "labels": [],
     "merged": true,
-    "number": 45215,
+    "number": 45263,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Qwen3_5]Remove unnecessary masked_fill_ in torch_chunk_gated_delta_rule attention computation: \"attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)\"",
-    "updated_at": "2026-04-09T14:12:51Z"
+    "title": "Add `hasattr(torch.backends.cudnn, \"conv\")` to `conftest.py`",
+    "updated_at": "2026-04-06T19:46:18Z"
   },
   {
-    "additions": 45,
-    "author": "kaixuanliu",
+    "additions": 1,
+    "author": "lowzhao",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes failed test case: `tests/models/cohere_asr/test_modeling_cohere_asr.py::CohereAsrModelTest::test_model_parallel_beam_search`, and add some adjustment to make the test cases pass for Intel XPU device. @ydshieh pls help review,\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Fix docstring spelling mistake TokenizersBackend.convert_to_native_format. ```python @classmethod def convert_to_native_format(cls, trust_remote_code=False, **kwargs): \"\"\"s <---- additional s ``` Likely caused by mi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45214",
-    "created_at": "2026-04-03T08:32:34Z",
-    "deletions": 14,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45262",
+    "created_at": "2026-04-06T08:41:40Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45214/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45214",
+    "files_url": "https://github.com/huggingface/transformers/pull/45262/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45262",
     "labels": [],
     "merged": true,
-    "number": 45214,
-    "review_comments_count": 2,
+    "number": 45262,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "cohere_asr: fix bug for model_parallel_beam_search test case",
-    "updated_at": "2026-04-13T02:40:29Z"
+    "title": "doc: fix TokenizersBackend.convert_to_native_format docstring",
+    "updated_at": "2026-04-06T16:32:44Z"
   },
   {
-    "additions": 6052,
-    "author": "tarekziade",
+    "additions": 22,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "comparison https://github.com/huggingface/transformers/blob/937d61b9fa00001da1a0680ecf8061b5990fbcd7/sarvam_moe_comparison.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once mer\u2026",
-    "changed_files": 26,
+    "body_excerpt": "# What this PR does It's working, see https://github.com/huggingface/transformers/actions/runs/24025915210 (the failing job in this PR is because the workflow needs to be on `main` to be effective).",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45213",
-    "created_at": "2026-04-03T08:25:49Z",
-    "deletions": 209,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45213/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45213",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45261",
+    "created_at": "2026-04-06T07:52:29Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45261/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45261",
     "labels": [],
-    "merged": false,
-    "number": 45213,
+    "merged": true,
+    "number": 45261,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "DO NOT MERGE - model creation skill",
-    "updated_at": "2026-04-03T12:26:09Z"
+    "state": "closed",
+    "title": "empty",
+    "updated_at": "2026-04-06T09:24:59Z"
   },
   {
-    "additions": 29,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh pls help review, thx!",
+    "additions": 18,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary PR #43514 refactored `_preprocess` to pass `resample=resample` to `resize`, but the `resize` method in `SmolVLMVideoProcessor` still had `interpolation` as its parameter name. The `resample` kwarg was silently swallowed by `**kw\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45212",
-    "created_at": "2026-04-03T07:44:35Z",
-    "deletions": 6,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45258",
+    "created_at": "2026-04-06T05:09:09Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45212/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45212",
+    "files_url": "https://github.com/huggingface/transformers/pull/45258/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45258",
     "labels": [],
     "merged": true,
-    "number": 45212,
+    "number": 45258,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Fix `SmolVLM` video processor `resize` using wrong interpolation after backend refactor",
+    "updated_at": "2026-04-06T20:40:22Z"
+  },
+  {
+    "additions": 269,
+    "author": "lucianommartins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# [Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility ## What does this PR do? Rewrites the `_patch_template_for_openai_tool_role()` function in `convert_gemma4_weights.py` to fully support OpenAI Chat Completi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45257",
+    "created_at": "2026-04-05T22:07:53Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45257/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45257",
+    "labels": [],
+    "merged": false,
+    "number": 45257,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "musicflamingo: add test support for Intel XPU device",
-    "updated_at": "2026-04-10T10:41:20Z"
+    "title": "[Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility",
+    "updated_at": "2026-04-10T13:28:04Z"
   },
   {
-    "additions": 4,
-    "author": "matdou",
+    "additions": 39,
+    "author": "zozo123",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #45208 # What does this PR do? This PR corrects an incorrect return type in `Qwen3MoeSparseMoeBlock.forward`. The method was annotated as returning `tuple[torch.Tensor, torch.Tensor]`, while the implementation returns a `torch.Tensor\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Summary When saving a Qwen3.5 VL model via `save_pretrained`, the `revert_weight_conversion` for `qwen3_5_text` replaces a leading `model.` segment. This wrongly matches keys that already start with `model.language_model.` on composite\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45211",
-    "created_at": "2026-04-03T07:44:32Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45256",
+    "created_at": "2026-04-05T19:00:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45211/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45211",
+    "files_url": "https://github.com/huggingface/transformers/pull/45256/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45256",
     "labels": [],
     "merged": false,
-    "number": 45211,
+    "number": 45256,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Qwen3MoE] Fix wrong return type annotation in Qwen3MoeSparseMoeBlock.forward",
-    "updated_at": "2026-04-08T19:20:49Z"
+    "title": "fix: skip qwen3_5_text checkpoint remap for nested VL language_model",
+    "updated_at": "2026-04-06T18:50:06Z"
   },
   {
-    "additions": 11,
-    "author": "ArthurZucker",
+    "additions": 0,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Commits that got in the release branch to allow pushing",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? - `tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_model_450m_logits`: - failed due to 6217adc6c8f0be7b5374e6a46129ad2214e4c6ed - `tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForCondi\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45210",
-    "created_at": "2026-04-03T05:42:21Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45254",
+    "created_at": "2026-04-05T18:20:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45210/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45210",
+    "files_url": "https://github.com/huggingface/transformers/pull/45254/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45254",
     "labels": [],
-    "merged": true,
-    "number": 45210,
+    "merged": false,
+    "number": 45254,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix pypi release",
-    "updated_at": "2026-04-03T06:40:39Z"
+    "state": "open",
+    "title": "Fix more integration tests for important models",
+    "updated_at": "2026-04-06T05:23:46Z"
   },
   {
-    "additions": 3,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh pls help review, thx!",
-    "changed_files": 1,
+    "additions": 33,
+    "author": "Charly21r",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a bug where `use_cache=False` produces garbage logits in Gemma 4 models due to broken KV sharing between layers. Fixes #45242 ## Root cause of the issue Gemma 4 introduces two architectural features not presen\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45209",
-    "created_at": "2026-04-03T05:40:36Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45253",
+    "created_at": "2026-04-05T18:12:08Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45209/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45209",
+    "files_url": "https://github.com/huggingface/transformers/pull/45253/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45253",
     "labels": [],
-    "merged": true,
-    "number": 45209,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 45253,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "nomic_bert: make the test suitable for general device.",
-    "updated_at": "2026-04-13T02:40:26Z"
+    "title": "Fix Gemma4 `use_cache=False` producing bad logits",
+    "updated_at": "2026-04-09T08:17:08Z"
   },
   {
-    "additions": 41,
-    "author": "w4nderlust",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #45206 ## What does this PR do? Adds documentation for the Gemma4 Per-Layer Embeddings (PLE) system, which is currently pretty hard to reverse-engineer from the code alone. I ran into this while implementing Gemma4 inference from scr\u2026",
-    "changed_files": 3,
+    "additions": 4,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? #43166 used `torch.set_float32_matmul_precision(\"high\")` which causes (likely) TF32 being used > \u201chigh\u201d, float32 matrix multiplications either use the TensorFloat32 datatype (10 mantissa bits explicitly stored) or t\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45207",
-    "created_at": "2026-04-03T05:15:47Z",
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45252",
+    "created_at": "2026-04-05T16:51:29Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45207/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45207",
+    "files_url": "https://github.com/huggingface/transformers/pull/45252/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45252",
     "labels": [],
+    "merged": true,
+    "number": 45252,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix unexpected TF32 being enabled in testing",
+    "updated_at": "2026-04-07T10:14:50Z"
+  },
+  {
+    "additions": 91,
+    "author": "balgaly",
+    "author_association": "NONE",
+    "body_excerpt": "## Problem `torch.multinomial` rejects last dimensions >= 2**24. Beam search with `do_sample=True` builds a flat distribution of size `num_beams * vocab_size`, which can exceed that limit (e.g. large beams + ~164k vocab), crashing during g\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45251",
+    "created_at": "2026-04-05T15:38:58Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45251",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45207,
-    "review_comments_count": 6,
-    "state": "open",
-    "title": "[Gemma4] Add docstrings for Per-Layer Embeddings (PLE) pipeline",
-    "updated_at": "2026-04-11T08:18:17Z"
+    "number": 45251,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(generation): beam sample when num_beams * vocab_size exceeds multinomial limit",
+    "updated_at": "2026-04-08T14:50:40Z"
   },
   {
-    "additions": 112,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh pls help review, thx!",
-    "changed_files": 3,
+    "additions": 16,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? PR #42428 change the way to enable / disable torch's TF32 using torch new API. It turns out set > torch.backends.fp32_precision = False would still have > torch.backends.cudnn.conv.fp32_precision = \"tf32\" > torch.ba\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45204",
-    "created_at": "2026-04-03T02:32:39Z",
-    "deletions": 10,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45248",
+    "created_at": "2026-04-05T07:51:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45204/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45204",
+    "files_url": "https://github.com/huggingface/transformers/pull/45248/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45248",
     "labels": [],
     "merged": true,
-    "number": 45204,
+    "number": 45248,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "fix bug for videomt model device mismatch",
-    "updated_at": "2026-04-13T02:40:27Z"
+    "title": "Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.",
+    "updated_at": "2026-04-10T21:48:56Z"
   },
   {
-    "additions": 2,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Disable FlashAttention support for Gemm4 which FA cannot suport due to global.head-dim=512. I am very confused at the current code/test for Gemma4. I ran real inference using transformer `main` and `fa` throws head-\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "gagandhakrey",
+    "author_association": "NONE",
+    "body_excerpt": "\u2026 validation # What does this PR do? Problem The invert_attention_mask function in src/transformers/modeling_utils.py crashed with an UnboundLocalError when given an encoder_attention_mask shape that wasn't exactly 2D or 3D. Because it onl\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45202",
-    "created_at": "2026-04-02T23:37:22Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45247",
+    "created_at": "2026-04-05T03:45:23Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45202/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45202",
+    "files_url": "https://github.com/huggingface/transformers/pull/45247/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45247",
     "labels": [],
     "merged": false,
-    "number": 45202,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Fix gemma4 has flash-attention incompatbile head-dim=512",
-    "updated_at": "2026-04-12T03:39:09Z"
+    "number": 45247,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix UnboundLocalError in invert_attention_mask by adding proper shape\u2026",
+    "updated_at": "2026-04-09T14:11:43Z"
   },
   {
-    "additions": 9,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following **Wav2Vec2PhonemeCTC** use cases were identified and fixed in this PR: \u2192 [05c0e1d (\"rm slow tokenizers\")](https://github.com/huggingface/transformers/pull/40936) added [self.backend = kwargs.pop(\"bac\u2026",
-    "changed_files": 2,
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Let's CI go great!!!!",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45199",
-    "created_at": "2026-04-02T20:03:22Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45244",
+    "created_at": "2026-04-04T18:52:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45199/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45199",
+    "files_url": "https://github.com/huggingface/transformers/pull/45244/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45244",
     "labels": [],
     "merged": false,
-    "number": 45199,
-    "review_comments_count": 1,
+    "number": 45244,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix(models): Resolve regressions in Wav2Vec2PhonemeCTCTokenizer (wav2vec2-lv-60-espeak-cv-ft)",
-    "updated_at": "2026-04-13T04:46:04Z"
+    "title": "Let's CI go great",
+    "updated_at": "2026-04-04T19:59:28Z"
   },
   {
-    "additions": 104,
-    "author": "douglas-reid",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes issues with the Gemma 4 model docs. Mainly, this is updating examples to point at the actual models, with FC and Audio examples added. - [ X ] I confirm that this is not a pure code agent PR. ## Before submitting - [ X ] This PR fixe\u2026",
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Use torch 2.11 for our (daily) CI since it's released for 2 weeks already. For CircleCI, we need to fix something regarding `torchvision.io.read_video`. For daily CI, torch 2.11 doesn't cause issues (for those `torc\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45197",
-    "created_at": "2026-04-02T19:05:35Z",
-    "deletions": 42,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45243",
+    "created_at": "2026-04-04T18:09:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45197/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45197",
+    "files_url": "https://github.com/huggingface/transformers/pull/45243/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45243",
     "labels": [],
     "merged": true,
-    "number": 45197,
+    "number": 45243,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(docs): correct gemma4 docs and examples",
-    "updated_at": "2026-04-02T22:23:16Z"
+    "title": "Nvidia CI with `torch 2.11`",
+    "updated_at": "2026-04-04T18:48:45Z"
   },
   {
-    "additions": 1,
-    "author": "stevhliu",
+    "additions": 523,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "fixes `<hfoptions>` tag",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? After the series of fixes in other previous PRs, we can now update the tiny model creation script. This update makes the script running without any failure, just 10 warnings. There are many # TODO, some of them may\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45196",
-    "created_at": "2026-04-02T18:02:44Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45241",
+    "created_at": "2026-04-04T12:30:35Z",
+    "deletions": 164,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45196/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45196",
+    "files_url": "https://github.com/huggingface/transformers/pull/45241/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45241",
     "labels": [],
     "merged": true,
-    "number": 45196,
+    "number": 45241,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] formatting",
-    "updated_at": "2026-04-03T09:37:52Z"
+    "title": "Update tiny model creation script",
+    "updated_at": "2026-04-04T17:19:34Z"
   },
   {
-    "additions": 90,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What this PR does Adds a new `load_image_as_tensor` utility leveraging torchvision's `decode_image` to `image_utils.py` and overrides `fetch_images` in `TorchvisionBackend` to use it. Previously, all image loading went through PIL regard\u2026",
-    "changed_files": 5,
+    "additions": 3,
+    "author": "shhKnight30",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Type checking for `PreTrainedConfig` subclasses broke in v5.4.0 and this fixes it. The culprit is `wrap_init_to_accept_kwargs` \u2014 it swaps out the dataclass-generated `__init__` with a `(**kwargs: Any)` wrapper at r\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45195",
-    "created_at": "2026-04-02T17:58:51Z",
-    "deletions": 20,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45240",
+    "created_at": "2026-04-04T10:29:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45195/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45195",
+    "files_url": "https://github.com/huggingface/transformers/pull/45240/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45240",
     "labels": [],
     "merged": true,
-    "number": 45195,
-    "review_comments_count": 5,
+    "number": 45240,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": " Use torchvision `decode_image` to load images in the torchvision backend",
-    "updated_at": "2026-04-09T17:08:04Z"
+    "title": "fix: restore mypy type checking for PreTrainedConfig subclasses (#45071)",
+    "updated_at": "2026-04-10T11:10:27Z"
   },
   {
-    "additions": 222,
-    "author": "zucchini-nlp",
+    "additions": 28,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes: - Replace `PretrainedConfig` with `PreTrainedConfig` - Don't import from other models inside `configuration_model.py`, instead resolve via modular - Text/vision sub-configs are documented (only kosmos was mis\u2026",
-    "changed_files": 23,
+    "body_excerpt": "# What does this PR do? We have introduced `CausalLMModelTest` for some time, but haven't update `get_test_info.py` accordingly, which causes some issues, in particularly for tiny model creation, regarding the part of the attribute `all_mo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45194",
-    "created_at": "2026-04-02T16:39:06Z",
-    "deletions": 40,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45238",
+    "created_at": "2026-04-04T07:25:15Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45194/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45194",
+    "files_url": "https://github.com/huggingface/transformers/pull/45238/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45238",
     "labels": [],
-    "merged": false,
-    "number": 45194,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 45238,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Configuration insoncistencies",
-    "updated_at": "2026-04-10T11:39:18Z"
+    "title": "Update `get_test_info.py` (related to tiny model creation)",
+    "updated_at": "2026-04-04T07:40:21Z"
   },
   {
-    "additions": 15,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45070 Though I am convinced that configs should not be pyndatic BaseClass, just because we already wrap all subclasses as dataclass. You never know what happe\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes the bug in `resize_token_embeddings` (may occur python3.10 or after). Quick reproduce: ``` from transformers import AutoModelForMaskedLM mdl = AutoModelForMaskedLM.from_pretrained(\"bert-base-uncased\") f = mdl.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45193",
-    "created_at": "2026-04-02T15:36:56Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45193/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45193",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45236",
+    "created_at": "2026-04-04T07:05:53Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45236/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45236",
     "labels": [],
     "merged": false,
-    "number": 45193,
+    "number": 45236,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Config can apply pyndatic validation without torch-dependence",
-    "updated_at": "2026-04-02T17:00:43Z"
+    "state": "closed",
+    "title": "resize_token_embeddings does not resize lm_head",
+    "updated_at": "2026-04-07T00:55:39Z"
   },
   {
-    "additions": 9896,
-    "author": "RyanMullins",
+    "additions": 102,
+    "author": "kallewoof",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "--------- # What does this PR do? model previously unable to use tools ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenec\u2026",
-    "changed_files": 41,
+    "body_excerpt": "# What does this PR do? This PR adds a tiny \"agnostic.gpu\" utility that is meant to allow easy replacing of unnecessarily hard-coded vendor-specific code. The code does not use `torch.accelerator` as it is still considered experimental, bu\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45192",
-    "created_at": "2026-04-02T14:35:18Z",
-    "deletions": 79,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45192/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45192",
-    "labels": [
-      "New model"
-    ],
-    "merged": true,
-    "number": 45192,
-    "review_comments_count": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45235",
+    "created_at": "2026-04-04T06:08:22Z",
+    "deletions": 13,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45235/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45235",
+    "labels": [],
+    "merged": false,
+    "number": 45235,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "casually dropping the most capable open weights on the planet",
-    "updated_at": "2026-04-03T12:28:06Z"
+    "title": "feat/rfc/poc: Agnostic GPU",
+    "updated_at": "2026-04-09T14:34:20Z"
   },
   {
-    "additions": 13,
-    "author": "saslifat-gif",
-    "author_association": "NONE",
-    "body_excerpt": "The Qwen2 tokenizer test file had no custom test methods \u2014 only integration constants inherited from TokenizerTesterMixin. This PR adds a test documenting two untested edge cases in decode(): **Before (no test, behavior undocumented):** ``\u2026",
-    "changed_files": 1,
+    "additions": 306,
+    "author": "pdufour",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This fixes an issues with the model that made it incompatible with exporting to onnx. Specifically the following has been changed: 1. if condition ``` if input_len < context_len: ``` Will give this error when you tr\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45191",
-    "created_at": "2026-04-02T14:08:15Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45233",
+    "created_at": "2026-04-04T00:15:04Z",
+    "deletions": 98,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45191/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45191",
+    "files_url": "https://github.com/huggingface/transformers/pull/45233/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45233",
     "labels": [],
     "merged": false,
-    "number": 45191,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add edge case tests for out-of-range token id decoding in Qwen2 tokenizer",
-    "updated_at": "2026-04-08T12:18:56Z"
+    "number": 45233,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "feat: make timesfm2_5 onnx export compatible",
+    "updated_at": "2026-04-10T23:37:52Z"
   },
   {
-    "additions": 92,
-    "author": "SunMarc",
+    "additions": 439,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch extends ty check to src/transformers/cli. Supersedes https://github.com/huggingface/transformers/pull/44566. I've added some of the changes in this PR",
-    "changed_files": 10,
+    "body_excerpt": "adds docs for static model rules so model contributors are aware of expectations > I wonder if it wouldn't make sense to auto generate that portion of the doc automatically added from @tarekziade feedback: - reformats `format_rule_details(\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45190",
-    "created_at": "2026-04-02T13:54:23Z",
-    "deletions": 34,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45232",
+    "created_at": "2026-04-03T22:41:22Z",
+    "deletions": 146,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45190/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45190",
+    "files_url": "https://github.com/huggingface/transformers/pull/45232/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45232",
     "labels": [],
-    "merged": false,
-    "number": 45190,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Fix ty for transformers cli",
-    "updated_at": "2026-04-09T16:54:36Z"
+    "merged": true,
+    "number": 45232,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[docs] static model rules",
+    "updated_at": "2026-04-08T15:06:17Z"
   },
   {
-    "additions": 409,
+    "additions": 20,
     "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Add two optional, backward-compatible inputs (`test_path_prefix`, `test_file_suffix`) to `model_jobs.yml` and `self-scheduled.yml` \u2014 defaults preserve all existing behavior - Extend the `set-matrix` step in `self-scheduled.yml\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? All of this are trivial ... (maybe except \"evolla\")",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45189",
-    "created_at": "2026-04-02T13:43:29Z",
-    "deletions": 185,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45228",
+    "created_at": "2026-04-03T18:17:29Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45189/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45189",
+    "files_url": "https://github.com/huggingface/transformers/pull/45228/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45228",
     "labels": [],
-    "merged": false,
-    "number": 45189,
+    "merged": true,
+    "number": 45228,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add doc test CI workflow reusing existing model job infrastructure",
-    "updated_at": "2026-04-03T07:30:17Z"
+    "state": "closed",
+    "title": "More fix for tiny model creation",
+    "updated_at": "2026-04-03T19:25:51Z"
   },
   {
-    "additions": 5,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do ? This PR fixes the `test_register_result_handler`. Not sure how it passed in the past when i added it but since CB returns `generated_tokens` from the same list to avoid copy, len(results[i].generated_tokens) for i\u2026",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "akhilc08",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Remove the unused `PILImageResampling` runtime import from `video_processing_utils.py` which causes an `ImportError` when Pillow is not installed - Also remove the now-unused `is_vision_available` import that only guarded the\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45188",
-    "created_at": "2026-04-02T13:15:57Z",
-    "deletions": 8,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45227",
+    "created_at": "2026-04-03T17:44:25Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45188/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45188",
+    "files_url": "https://github.com/huggingface/transformers/pull/45227/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45227",
     "labels": [],
-    "merged": true,
-    "number": 45188,
+    "merged": false,
+    "number": 45227,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix `test_register_result_handler`",
-    "updated_at": "2026-04-03T09:37:31Z"
+    "title": "fix: remove nonexistent PILImageResampling import from video_processing_utils",
+    "updated_at": "2026-04-05T17:04:49Z"
   },
   {
     "additions": 3,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Close file handler.",
+    "author": "akhilc08",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes an `IndexError: string index out of range` crash in `_split_tokens_on_unicode()` when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD) - Adds a bounds check so that when `unicode_offse\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45187",
-    "created_at": "2026-04-02T13:10:31Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45226",
+    "created_at": "2026-04-03T17:43:21Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45187/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45187",
+    "files_url": "https://github.com/huggingface/transformers/pull/45226/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45226",
     "labels": [],
-    "merged": true,
-    "number": 45187,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 45226,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Close file handler",
-    "updated_at": "2026-04-10T11:31:40Z"
+    "title": "fix: handle trailing replacement character in Whisper word timestamp decoding",
+    "updated_at": "2026-04-05T17:04:49Z"
   },
   {
-    "additions": 7248,
-    "author": "zucchini-nlp",
+    "additions": 6,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Same as https://github.com/huggingface/transformers/pull/40962 but cleans up the code to match transformers API. Couldn't test due to errors, the integration test is failing atm. Still need to clean the testing file\u2026",
-    "changed_files": 21,
+    "body_excerpt": "# What does this PR do? The `dev` extra now indirectly pulls hf-doc-builder so the install step failed. We also need to update to current main for the latest features",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45186",
-    "created_at": "2026-04-02T12:29:46Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45225",
+    "created_at": "2026-04-03T16:46:54Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45186/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45186",
+    "files_url": "https://github.com/huggingface/transformers/pull/45225/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45225",
     "labels": [],
-    "merged": false,
-    "number": 45186,
+    "merged": true,
+    "number": 45225,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add new model: Isaac ",
-    "updated_at": "2026-04-02T12:55:56Z"
+    "state": "closed",
+    "title": "fix: hf-doc-builder insallation was failing",
+    "updated_at": "2026-04-03T17:02:09Z"
   },
   {
-    "additions": 57,
-    "author": "zucchini-nlp",
+    "additions": 2,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? If we have videos, the token type ids will be `2` but the current fn checks only image token types. This PR generalizes it rely only on `vision_group_ids` instead of token types",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Fix for tiny model",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45185",
-    "created_at": "2026-04-02T11:35:53Z",
-    "deletions": 104,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45224",
+    "created_at": "2026-04-03T16:39:23Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45185/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45185",
+    "files_url": "https://github.com/huggingface/transformers/pull/45224/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45224",
     "labels": [],
     "merged": true,
-    "number": 45185,
-    "review_comments_count": 2,
+    "number": 45224,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Generalize gemma vision mask to videos",
-    "updated_at": "2026-04-02T13:15:46Z"
+    "title": "remove unnecessary entries in some auto model mappings",
+    "updated_at": "2026-04-03T17:26:24Z"
   },
   {
-    "additions": 425,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds CPU offloading to continuous batching. It's in raft until perf and test status are reported. When the GPU KV cache is full and a request must be evicted, we check if there is enough VRAM to copy the request's KV cach\u2026",
-    "changed_files": 6,
+    "additions": 17,
+    "author": "Talhax55z",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #31356 ## What does this PR do? The `postprocess` method in `ObjectDetectionPipeline` was hardcoding `raw_annotations[0]`, which caused batch inference to only return results for the first image, ignoring all others. This PR replaces\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45184",
-    "created_at": "2026-04-02T10:12:00Z",
-    "deletions": 52,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45184/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45184",
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45223",
+    "created_at": "2026-04-03T16:36:25Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45223/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45223",
     "labels": [],
     "merged": false,
-    "number": 45184,
+    "number": 45223,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[CB] [Major] Add CPU request offloading",
-    "updated_at": "2026-04-08T14:16:58Z"
+    "state": "closed",
+    "title": "Fix: ObjectDetectionPipeline batch inference only returns first image results",
+    "updated_at": "2026-04-13T15:06:32Z"
   },
   {
-    "additions": 232,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The `transformers` CLI currently does a lot of work before command dispatch. In particular, the top-level entrypoint eagerly imports `transformers` and CLI subcommands with heavy dependencies, so even simple invocat\u2026",
-    "changed_files": 6,
+    "additions": 12,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary When using Gemma 3 or Gemma 4 for text-only supervised fine-tuning (no images), the forward pass raises a `ValueError` because `token_type_ids` / `mm_token_type_ids` is not provided. This happens because `AutoTokenizer` does not\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45181",
-    "created_at": "2026-04-02T08:03:40Z",
-    "deletions": 23,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45181/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45181",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45222",
+    "created_at": "2026-04-03T16:27:31Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45222/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45222",
     "labels": [],
     "merged": false,
-    "number": 45181,
+    "number": 45222,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Make the cli a top-level package",
-    "updated_at": "2026-04-09T12:01:41Z"
+    "title": "fix(gemma3, gemma4): default token_type_ids to zeros for text-only training",
+    "updated_at": "2026-04-07T11:47:12Z"
   },
   {
-    "additions": 48,
-    "author": "paulinebm",
+    "additions": 6,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "## \ud83d\udd12 Pin GitHub Actions to commit SHAs This PR pins all GitHub Actions to their exact commit SHA instead of mutable tags or branch names. **Why?** Pinning to a SHA prevents supply chain attacks where a tag (e.g. `v4`) could be moved to poi\u2026",
-    "changed_files": 18,
+    "body_excerpt": "# What does this PR do? As per title",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45180",
-    "created_at": "2026-04-02T08:00:02Z",
-    "deletions": 48,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45221",
+    "created_at": "2026-04-03T15:49:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45180/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45180",
+    "files_url": "https://github.com/huggingface/transformers/pull/45221/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45221",
     "labels": [],
-    "merged": true,
-    "number": 45180,
+    "merged": false,
+    "number": 45221,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "\ud83d\udd12 Pin GitHub Actions to commit SHAs",
-    "updated_at": "2026-04-02T09:12:55Z"
+    "state": "open",
+    "title": "user friendly error when loading audio from video",
+    "updated_at": "2026-04-09T14:16:24Z"
   },
   {
-    "additions": 327,
-    "author": "remi-or",
+    "additions": 341,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary This PR ads minor changes to `cache.update`, updates the memory handler with all new features and refactors a few parts of the code to make it more readable. Cache indexing: - Replace fancy indexing (cache[idx, :, :]) with expli\u2026",
-    "changed_files": 6,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45179",
-    "created_at": "2026-04-02T06:15:08Z",
-    "deletions": 288,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45179/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45179",
-    "labels": [],
-    "merged": true,
-    "number": 45179,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "[CB] Tweaks to update and minor fixes",
-    "updated_at": "2026-04-03T09:36:51Z"
-  },
-  {
-    "additions": 4966,
-    "author": "masoudpz",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 20,
+    "body_excerpt": "# What does this PR do? This PR adds transformers serve compatibility to multimodal models like qwen omni or gemma 4. We add support for audio with chat completion and response though `input_audio` -> the client need to `base64-encode` the\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45176",
-    "created_at": "2026-04-02T00:47:45Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45220",
+    "created_at": "2026-04-03T14:16:33Z",
+    "deletions": 39,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45176/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45176",
+    "files_url": "https://github.com/huggingface/transformers/pull/45220/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45220",
     "labels": [],
     "merged": false,
-    "number": 45176,
-    "review_comments_count": 0,
+    "number": 45220,
+    "review_comments_count": 9,
     "state": "open",
-    "title": "added efficietvitsam model to HF",
-    "updated_at": "2026-04-02T16:01:34Z"
+    "title": "Multimodal serve support ",
+    "updated_at": "2026-04-14T15:05:13Z"
   },
   {
-    "additions": 49,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "updates the docs with changes from #44796: - added section for `--compile` in the serve optimization docs - added section for `--model-timeout` in the Loading models section (useful when a model is silently kicked off and a user doesn't kn\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? google/gemma-4-26B-A4B-it tp 2, memory is 46G per rank wo the change, drop to about 25G w per rank with the change - text models: @ArthurZucker @Cyrilvallez",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45174",
-    "created_at": "2026-04-01T23:29:39Z",
-    "deletions": 21,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45219",
+    "created_at": "2026-04-03T13:59:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45174/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45174",
+    "files_url": "https://github.com/huggingface/transformers/pull/45219/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45219",
     "labels": [],
     "merged": true,
-    "number": 45174,
-    "review_comments_count": 2,
+    "number": 45219,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] transformers serve",
-    "updated_at": "2026-04-02T16:39:12Z"
+    "title": "Add MoE to Gemma4 TP plan",
+    "updated_at": "2026-04-08T14:09:40Z"
   },
   {
-    "additions": 16,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixed the `qwen3_5` / `qwen3_5_moe` reverse-loading tests by correcting the text model type used in the setup, and aligned the reverse-mapping behavior with gemma3n since they are all native multimodal. This also re\u2026",
-    "changed_files": 2,
+    "additions": 4057,
+    "author": "LysandreJik",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR offers a new, Agentic surface for transformers. It tries to apply what is done elsewhere with CLIs to `transformers`, leveraging many current use-cases of `transformers` and exposing them as CLI endpoints. I recommend reading this\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45173",
-    "created_at": "2026-04-01T20:20:37Z",
-    "deletions": 90,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45173/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45173",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45218",
+    "created_at": "2026-04-03T13:31:08Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45218/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45218",
     "labels": [],
-    "merged": true,
-    "number": 45173,
+    "merged": false,
+    "number": 45218,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[misc] fix qwen35 tests: correct the text model type and skip reverse_mapping",
-    "updated_at": "2026-04-02T13:05:39Z"
+    "state": "open",
+    "title": "Proposal: Agent-first CLI",
+    "updated_at": "2026-04-13T13:03:00Z"
   },
   {
-    "additions": 156,
-    "author": "ezylopx5",
+    "additions": 5,
+    "author": "ENg-122",
     "author_association": "NONE",
-    "body_excerpt": "## Problem Transformers currently provides sampling filters such as top-k, top-p, min-p, and top-h, but does not include top-n-sigma sampling from \"Top-n\u03c3: Not All Logits Are You Need\". This makes it harder to use a temperature-invariant t\u2026",
-    "changed_files": 8,
+    "body_excerpt": "# What does this PR do? Remove unnecessary masked_fill_(mask, 0) call in torch_chunk_gated_delta_rule. The decay_mask computed earlier already encodes the causal/lower-triangular structure (upper-triangle values are zero), so masking the a\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45172",
-    "created_at": "2026-04-01T19:25:12Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45215",
+    "created_at": "2026-04-03T09:08:28Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45172/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45172",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45172,
+    "files_url": "https://github.com/huggingface/transformers/pull/45215/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45215",
+    "labels": [],
+    "merged": true,
+    "number": 45215,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add TopNSigmaLogitsWarper and top_n_sigma generation config support",
-    "updated_at": "2026-04-04T06:24:26Z"
+    "title": "[Qwen3_5]Remove unnecessary masked_fill_ in torch_chunk_gated_delta_rule attention computation: \"attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)\"",
+    "updated_at": "2026-04-09T14:12:51Z"
   },
   {
-    "additions": 134,
-    "author": "Kash6",
+    "additions": 45,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "When input_boxes contains a mix of None and real box entries (e.g. input_boxes=[None, [[x1,y1,x2,y2]]]), the processor pads None entries with [-10,-10,0,0] but does not generate corresponding input_boxes_labels. The model's geometry encode\u2026",
-    "changed_files": 2,
+    "body_excerpt": "This PR fixes failed test case: `tests/models/cohere_asr/test_modeling_cohere_asr.py::CohereAsrModelTest::test_model_parallel_beam_search`, and add some adjustment to make the test cases pass for Intel XPU device. @ydshieh pls help review,\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45171",
-    "created_at": "2026-04-01T18:26:48Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45214",
+    "created_at": "2026-04-03T08:32:34Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45171/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45171",
+    "files_url": "https://github.com/huggingface/transformers/pull/45214/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45214",
     "labels": [],
-    "merged": false,
-    "number": 45171,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Sam3Processor missing input_boxes_labels for padded None entries",
-    "updated_at": "2026-04-08T23:23:47Z"
+    "merged": true,
+    "number": 45214,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "cohere_asr: fix bug for model_parallel_beam_search test case",
+    "updated_at": "2026-04-13T02:40:29Z"
   },
   {
-    "additions": 19,
-    "author": "zucchini-nlp",
+    "additions": 6052,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Before I forget, opening a PR. Will cause conflicts in https://github.com/huggingface/transformers/pull/44431, so I will better merge this after refactoring",
-    "changed_files": 10,
+    "body_excerpt": "comparison https://github.com/huggingface/transformers/blob/937d61b9fa00001da1a0680ecf8061b5990fbcd7/sarvam_moe_comparison.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once mer\u2026",
+    "changed_files": 26,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45170",
-    "created_at": "2026-04-01T17:50:33Z",
-    "deletions": 17,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45170/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45170",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45213",
+    "created_at": "2026-04-03T08:25:49Z",
+    "deletions": 209,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45213/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45213",
     "labels": [],
     "merged": false,
-    "number": 45170,
+    "number": 45213,
     "review_comments_count": 0,
     "state": "open",
-    "title": "`layrnorm` -> `layernorm`",
-    "updated_at": "2026-04-01T18:03:31Z"
+    "title": "DO NOT MERGE - model creation skill",
+    "updated_at": "2026-04-03T12:26:09Z"
   },
   {
-    "additions": 10,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "In https://github.com/huggingface/transformers/pull/45094 I introduced some errors to the remote code resolution when trying to detect if local code belonged to Transformers or not. These tests were: ```bash pytest tests/models/cohere_asr/\u2026",
+    "additions": 29,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45169",
-    "created_at": "2026-04-01T15:27:43Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45212",
+    "created_at": "2026-04-03T07:44:35Z",
     "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45169/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45169",
+    "files_url": "https://github.com/huggingface/transformers/pull/45212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45212",
     "labels": [],
     "merged": true,
-    "number": 45169,
+    "number": 45212,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix explicit local code resolution for tokenizers and image processors",
-    "updated_at": "2026-04-01T21:48:02Z"
+    "title": "musicflamingo: add test support for Intel XPU device",
+    "updated_at": "2026-04-10T10:41:20Z"
   },
   {
     "additions": 4,
-    "author": "w601sxs",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Based on our experimentation min and max lr for LLMs need to be set properly as defaults. Please refer to paper. For the broader community 1e-7 to 1e-4 are decent defaults # What does this PR do? <!-- Congratulations! You've made it this f\u2026",
-    "changed_files": 1,
+    "author": "matdou",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #45208 # What does this PR do? This PR corrects an incorrect return type in `Qwen3MoeSparseMoeBlock.forward`. The method was annotated as returning `tuple[torch.Tensor, torch.Tensor]`, while the implementation returns a `torch.Tensor\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45168",
-    "created_at": "2026-04-01T15:02:15Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45211",
+    "created_at": "2026-04-03T07:44:32Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45168/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45168",
-    "labels": [],
-    "merged": false,
-    "number": 45168,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Update min_lr and max_lr default values to better defaults",
-    "updated_at": "2026-04-08T16:58:59Z"
-  },
-  {
-    "additions": 16,
-    "author": "xu-song",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for [Anthropic's JSON function style](https://platform.claude.com/docs/en/agents-and-tools/tool-use/define-tools): `{\"name\": \"...\", \"description\": \"...\", \"input_schema\": {...}}` ## Usage Example\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45167",
-    "created_at": "2026-04-01T14:50:33Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45167/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45167",
+    "files_url": "https://github.com/huggingface/transformers/pull/45211/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45211",
     "labels": [],
     "merged": false,
-    "number": 45167,
+    "number": 45211,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add anthropic style of function schema",
-    "updated_at": "2026-04-03T05:49:56Z"
+    "state": "closed",
+    "title": "[Qwen3MoE] Fix wrong return type annotation in Qwen3MoeSparseMoeBlock.forward",
+    "updated_at": "2026-04-08T19:20:49Z"
   },
   {
-    "additions": 67,
-    "author": "Rocketknight1",
+    "additions": 11,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "We didn't think we needed them, but I think we do after all!",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Commits that got in the release branch to allow pushing",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45166",
-    "created_at": "2026-04-01T14:46:33Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45210",
+    "created_at": "2026-04-03T05:42:21Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45166/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45166",
+    "files_url": "https://github.com/huggingface/transformers/pull/45210/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45210",
     "labels": [],
     "merged": true,
-    "number": 45166,
+    "number": 45210,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Re-add regex substitutions to the response parsing spec",
-    "updated_at": "2026-04-01T15:46:34Z"
+    "title": "Fix pypi release",
+    "updated_at": "2026-04-03T06:40:39Z"
   },
   {
-    "additions": 11,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Emu3 was not updated in recent refactor and blip files were swapped. This PR fixes it Do we need anything to support BC importing from old files, or does it happen in `LazyImports` @yonigozlan ?",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45165",
-    "created_at": "2026-04-01T14:18:38Z",
-    "deletions": 6,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45209",
+    "created_at": "2026-04-03T05:40:36Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45165/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45165",
+    "files_url": "https://github.com/huggingface/transformers/pull/45209/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45209",
     "labels": [],
     "merged": true,
-    "number": 45165,
-    "review_comments_count": 0,
+    "number": 45209,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix missing image processors backends",
-    "updated_at": "2026-04-07T13:46:24Z"
+    "title": "nomic_bert: make the test suitable for general device.",
+    "updated_at": "2026-04-13T02:40:26Z"
   },
   {
-    "additions": 1,
-    "author": "albertvillanova",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix TypeError: 'NoneType' object is not iterable in `GenerationMixin.generate` - Fix for None layer_types <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is goin\u2026",
-    "changed_files": 1,
+    "additions": 65,
+    "author": "w4nderlust",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #45206 ## What does this PR do? Adds documentation for the Gemma4 Per-Layer Embeddings (PLE) system, which is currently pretty hard to reverse-engineer from the code alone. I ran into this while implementing Gemma4 inference from scr\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45164",
-    "created_at": "2026-04-01T13:53:41Z",
-    "deletions": 1,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45207",
+    "created_at": "2026-04-03T05:15:47Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45164/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45164",
+    "files_url": "https://github.com/huggingface/transformers/pull/45207/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45207",
     "labels": [],
-    "merged": true,
-    "number": 45164,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix TypeError: 'NoneType' object is not iterable in GenerationMixin.generate",
-    "updated_at": "2026-04-03T04:57:18Z"
+    "merged": false,
+    "number": 45207,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "[Gemma4] Add docstrings for Per-Layer Embeddings (PLE) pipeline",
+    "updated_at": "2026-04-13T19:48:20Z"
   },
   {
-    "additions": 131,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Makes sure the full error is displayed on errors",
-    "changed_files": 2,
+    "additions": 112,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45163",
-    "created_at": "2026-04-01T13:41:18Z",
-    "deletions": 20,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45204",
+    "created_at": "2026-04-03T02:32:39Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45163/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45163",
+    "files_url": "https://github.com/huggingface/transformers/pull/45204/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45204",
     "labels": [],
     "merged": true,
-    "number": 45163,
-    "review_comments_count": 0,
+    "number": 45204,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "tweak checkers output on errors",
-    "updated_at": "2026-04-09T11:50:16Z"
+    "title": "fix bug for videomt model device mismatch",
+    "updated_at": "2026-04-13T02:40:27Z"
   },
   {
-    "additions": 513,
-    "author": "onwp",
+    "additions": 2,
+    "author": "Qubitium",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Description Re-creates the Turkish documentation that was accidentally removed during the TF/Flax cleanup (commit fce74651). This PR adds the foundational Turkish docs with the complete \"Get Started\" section. ### Files added - `docs/sou\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? Disable FlashAttention support for Gemm4 which FA cannot suport due to global.head-dim=512. I am very confused at the current code/test for Gemma4. I ran real inference using transformer `main` and `fa` throws head-\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45159",
-    "created_at": "2026-04-01T05:15:32Z",
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45202",
+    "created_at": "2026-04-02T23:37:22Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45159/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45159",
+    "files_url": "https://github.com/huggingface/transformers/pull/45202/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45202",
     "labels": [],
     "merged": false,
-    "number": 45159,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add Turkish documentation: Get Started section",
-    "updated_at": "2026-04-01T05:16:49Z"
+    "number": 45202,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Fix gemma4 has flash-attention incompatbile head-dim=512",
+    "updated_at": "2026-04-12T03:39:09Z"
   },
   {
-    "additions": 525,
-    "author": "onwp",
+    "additions": 18,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR re-creates the Turkish (`tr`) documentation for the Transformers library, starting with the **Get Started** section. The original Turkish translation was accidentally removed in commit fce74651 (#40999). This contributio\u2026",
-    "changed_files": 6,
+    "body_excerpt": "### What does this PR do? The following **Wav2Vec2PhonemeCTC** use cases were identified and fixed in this PR: \u2192 [05c0e1d (\"rm slow tokenizers\")](https://github.com/huggingface/transformers/pull/40936) added [self.backend = kwargs.pop(\"bac\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45158",
-    "created_at": "2026-04-01T05:05:40Z",
-    "deletions": 1,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45199",
+    "created_at": "2026-04-02T20:03:22Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45158/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45158",
+    "files_url": "https://github.com/huggingface/transformers/pull/45199/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45199",
     "labels": [],
     "merged": true,
-    "number": 45158,
-    "review_comments_count": 0,
+    "number": 45199,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Add Turkish (tr) translation for Get Started section",
-    "updated_at": "2026-04-02T17:50:46Z"
+    "title": "fix(models): Resolve regressions in Wav2Vec2PhonemeCTCTokenizer (wav2vec2-lv-60-espeak-cv-ft)",
+    "updated_at": "2026-04-14T14:00:01Z"
   },
   {
-    "additions": 190,
-    "author": "Qubitium",
+    "additions": 104,
+    "author": "douglas-reid",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Add PrismML 1bit (gguf based, group-size 128) model inference suppport. ## Code Agent Policy - [ ] I confirm that this is not a pure code agent PR. ## Before submitting - [ ] This PR fixes a typo or improves the doc\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45157",
-    "created_at": "2026-04-01T03:11:51Z",
-    "deletions": 23,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45157/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45157",
-    "labels": [],
-    "merged": false,
-    "number": 45157,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[WIP] PrismML Bonsai model support",
-    "updated_at": "2026-04-02T20:53:12Z"
-  },
-  {
-    "additions": 50,
-    "author": "Cursx",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? After the `merge_and_unload()` operation in PEFT, embed_tokens and lm_head become independent tensors with different values, but config.tie_word_embeddings remains True. The load-side already detects this using torc\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes issues with the Gemma 4 model docs. Mainly, this is updating examples to point at the actual models, with FC and Audio examples added. - [ X ] I confirm that this is not a pure code agent PR. ## Before submitting - [ X ] This PR fixe\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45156",
-    "created_at": "2026-04-01T02:36:38Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45156/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45156",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45197",
+    "created_at": "2026-04-02T19:05:35Z",
+    "deletions": 42,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45197/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45197",
     "labels": [],
-    "merged": false,
-    "number": 45156,
+    "merged": true,
+    "number": 45197,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix save_pretrained writing incorrect tie_word_embeddings=True config after PEFT merge",
-    "updated_at": "2026-04-01T09:34:05Z"
+    "title": "fix(docs): correct gemma4 docs and examples",
+    "updated_at": "2026-04-02T22:23:16Z"
   },
   {
-    "additions": 46,
-    "author": "michaelbenayoun",
+    "additions": 1,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add support for loading adapters with `PreTrainedModel.load_adapter` when using TP. See: https://github.com/huggingface/peft/pull/3096",
+    "body_excerpt": "fixes `<hfoptions>` tag",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45155",
-    "created_at": "2026-03-31T22:23:06Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45196",
+    "created_at": "2026-04-02T18:02:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45155/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45155",
+    "files_url": "https://github.com/huggingface/transformers/pull/45196/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45196",
     "labels": [],
     "merged": true,
-    "number": 45155,
-    "review_comments_count": 2,
+    "number": 45196,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Load adapter with TP",
-    "updated_at": "2026-04-09T22:35:45Z"
+    "title": "[docs] formatting",
+    "updated_at": "2026-04-03T09:37:52Z"
   },
   {
-    "additions": 676,
-    "author": "FaizanImran-blip",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed bug #45072 / #45071 where PretrainedConfig type checking and Pydantic model field validation were broken in v5.4.0. Added proper type checking for 'num_labels'. Added unit tests in test.py to verify correct and incorrect types. Verif\u2026",
+    "additions": 90,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What this PR does Adds a new `load_image_as_tensor` utility leveraging torchvision's `decode_image` to `image_utils.py` and overrides `fetch_images` in `TorchvisionBackend` to use it. Previously, all image loading went through PIL regard\u2026",
     "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45154",
-    "created_at": "2026-03-31T20:28:32Z",
-    "deletions": 153,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45195",
+    "created_at": "2026-04-02T17:58:51Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45154/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45154",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45154,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/45195/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45195",
+    "labels": [],
+    "merged": true,
+    "number": 45195,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Pretrained-config bug(45072/huggingfacebug)",
-    "updated_at": "2026-04-09T11:43:55Z"
+    "title": " Use torchvision `decode_image` to load images in the torchvision backend",
+    "updated_at": "2026-04-09T17:08:04Z"
   },
   {
-    "additions": 502,
-    "author": "vasqu",
+    "additions": 222,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, with torch releasing the varlen API, we can somewhat use native FA (with limited feature support) Restrictions - Unsupported features - Dropout - Learnable sinks (attention sinks) - Determinism - Softcap - CB KV cache native\u2026",
-    "changed_files": 19,
+    "body_excerpt": "# What does this PR do? Fixes: - Replace `PretrainedConfig` with `PreTrainedConfig` - Don't import from other models inside `configuration_model.py`, instead resolve via modular - Text/vision sub-configs are documented (only kosmos was mis\u2026",
+    "changed_files": 23,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45153",
-    "created_at": "2026-03-31T19:43:19Z",
-    "deletions": 386,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45153/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45153",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45194",
+    "created_at": "2026-04-02T16:39:06Z",
+    "deletions": 40,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45194/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45194",
     "labels": [],
     "merged": false,
-    "number": 45153,
-    "review_comments_count": 25,
-    "state": "open",
-    "title": "[`FA`] Native torch integration",
-    "updated_at": "2026-04-01T20:01:16Z"
+    "number": 45194,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Configuration insoncistencies",
+    "updated_at": "2026-04-10T11:39:18Z"
   },
   {
-    "additions": 389,
-    "author": "stevhliu",
+    "additions": 15,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "refactors the testing docs to be more contributor-facing organized around writing model tests instead of being a collection of pytest usage examples and CI maintenance. also updates the pr checks doc so contributors are better prepared to\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45070 Though I am convinced that configs should not be pyndatic BaseClass, just because we already wrap all subclasses as dataclass. You never know what happe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45152",
-    "created_at": "2026-03-31T18:12:09Z",
-    "deletions": 1363,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45193",
+    "created_at": "2026-04-02T15:36:56Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45152/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45152",
+    "files_url": "https://github.com/huggingface/transformers/pull/45193/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45193",
     "labels": [],
     "merged": false,
-    "number": 45152,
-    "review_comments_count": 6,
+    "number": 45193,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[docs] model testing",
-    "updated_at": "2026-04-13T07:09:25Z"
+    "title": "Config can apply pyndatic validation without torch-dependence",
+    "updated_at": "2026-04-02T17:00:43Z"
   },
   {
-    "additions": 1,
-    "author": "maanas1234",
+    "additions": 9896,
+    "author": "RyanMullins",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? The previous code snippet had an error in how `TrainingArguments` was being used, which could lead to confusion or incorrect implementation. Corrected the example to properly demonstrate how to initialize and use `T\u2026",
-    "changed_files": 1,
+    "body_excerpt": "--------- # What does this PR do? model previously unable to use tools ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenec\u2026",
+    "changed_files": 41,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45150",
-    "created_at": "2026-03-31T17:31:37Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45192",
+    "created_at": "2026-04-02T14:35:18Z",
+    "deletions": 79,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45150/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45150",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45192/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45192",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 45150,
-    "review_comments_count": 1,
+    "number": 45192,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix incorrect TrainingArguments example in training.md",
-    "updated_at": "2026-03-31T18:31:25Z"
+    "title": "casually dropping the most capable open weights on the planet",
+    "updated_at": "2026-04-03T12:28:06Z"
   },
   {
-    "additions": 4633,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "First pass: ~1M+ tokens in, ~115K+ out, Opus mainly, $42, 1h30 # PR #44320 vs Our Implementation ## What we got right - Same overall structure: modular file + generated standalone files + conversion script + tests + docs - Same model direc\u2026",
-    "changed_files": 20,
+    "additions": 13,
+    "author": "saslifat-gif",
+    "author_association": "NONE",
+    "body_excerpt": "The Qwen2 tokenizer test file had no custom test methods \u2014 only integration constants inherited from TokenizerTesterMixin. This PR adds a test documenting two untested edge cases in decode(): **Before (no test, behavior undocumented):** ``\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45149",
-    "created_at": "2026-03-31T15:56:11Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45149/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45149",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45191",
+    "created_at": "2026-04-02T14:08:15Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45191/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45191",
     "labels": [],
     "merged": false,
-    "number": 45149,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "DO NOT MERGE adding SAML3-LiteText with a skill, first pass",
-    "updated_at": "2026-04-01T06:28:20Z"
-  },
-  {
-    "additions": 13,
-    "author": "HallerPatrick",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #45146 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45148",
-    "created_at": "2026-03-31T15:20:02Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45148/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45148",
-    "labels": [],
-    "merged": false,
-    "number": 45148,
+    "number": 45191,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow for all layers in Qwen3.5 architecture to be Gated Deltanet.",
-    "updated_at": "2026-04-02T11:18:12Z"
+    "title": "Add edge case tests for out-of-range token id decoding in Qwen2 tokenizer",
+    "updated_at": "2026-04-08T12:18:56Z"
   },
   {
-    "additions": 309,
-    "author": "mobicham",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes hqq support that has been broken for a couple of months now after a refactoring: * Online quantization works fine now. * Serialization to load/save HQQ models is fixed too. ## Code Agent Policy - [x] I\u2026",
-    "changed_files": 3,
+    "additions": 92,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch extends ty check to src/transformers/cli. Supersedes https://github.com/huggingface/transformers/pull/44566. I've added some of the changes in this PR",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45147",
-    "created_at": "2026-03-31T14:59:47Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45190",
+    "created_at": "2026-04-02T13:54:23Z",
+    "deletions": 32,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45147/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45147",
+    "files_url": "https://github.com/huggingface/transformers/pull/45190/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45190",
     "labels": [],
     "merged": false,
-    "number": 45147,
-    "review_comments_count": 8,
+    "number": 45190,
+    "review_comments_count": 18,
     "state": "open",
-    "title": "Fix broken HQQ support",
-    "updated_at": "2026-04-09T16:14:06Z"
+    "title": "Fix ty for transformers cli",
+    "updated_at": "2026-04-14T14:32:12Z"
   },
   {
-    "additions": 2325,
-    "author": "casinca",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hello, this PR aims to add the MiMo-V2-Flash model to the Transformers library Fixes https://github.com/huggingface/transformers/issues/42954 MiMo-V2 is \"The last of the SOTAs\" that isn't natively supported by the T\u2026",
-    "changed_files": 12,
+    "additions": 409,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Add two optional, backward-compatible inputs (`test_path_prefix`, `test_file_suffix`) to `model_jobs.yml` and `self-scheduled.yml` \u2014 defaults preserve all existing behavior - Extend the `set-matrix` step in `self-scheduled.yml\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45144",
-    "created_at": "2026-03-31T13:53:28Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45189",
+    "created_at": "2026-04-02T13:43:29Z",
+    "deletions": 185,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45144/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45144",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45189/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45189",
+    "labels": [],
     "merged": false,
-    "number": 45144,
+    "number": 45189,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add Xiaomi MiMo-V2",
-    "updated_at": "2026-04-08T20:56:21Z"
+    "title": "Add doc test CI workflow reusing existing model job infrastructure",
+    "updated_at": "2026-04-03T07:30:17Z"
   },
   {
-    "additions": 102,
-    "author": "Rocketknight1",
+    "additions": 5,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds `parse_response` to Processor classes by wrapping the `Tokenizer` method! cc @zucchini-nlp",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do ? This PR fixes the `test_register_result_handler`. Not sure how it passed in the past when i added it but since CB returns `generated_tokens` from the same list to avoid copy, len(results[i].generated_tokens) for i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45143",
-    "created_at": "2026-03-31T13:11:49Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45188",
+    "created_at": "2026-04-02T13:15:57Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45143/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45143",
+    "files_url": "https://github.com/huggingface/transformers/pull/45188/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45188",
     "labels": [],
     "merged": true,
-    "number": 45143,
-    "review_comments_count": 4,
+    "number": 45188,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add parse_response to Processor, make it a bit more official",
-    "updated_at": "2026-03-31T17:07:16Z"
+    "title": "fix `test_register_result_handler`",
+    "updated_at": "2026-04-03T09:37:31Z"
   },
   {
-    "additions": 4,
-    "author": "casinca",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Close file handler.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45142",
-    "created_at": "2026-03-31T12:47:46Z",
-    "deletions": 4,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45187",
+    "created_at": "2026-04-02T13:10:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45142/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45142",
+    "files_url": "https://github.com/huggingface/transformers/pull/45187/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45187",
     "labels": [],
-    "merged": false,
-    "number": 45142,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45187,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "refactor(gpt-oss): rename `eager_attention_forward` to `eager_attention_forward_with_sink`",
-    "updated_at": "2026-04-02T16:44:14Z"
+    "title": "Close file handler",
+    "updated_at": "2026-04-10T11:31:40Z"
   },
   {
-    "additions": 19,
-    "author": "ydshieh",
+    "additions": 7248,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Really stupid ... Currently, change a docstring will count as \"modified files\", then the impacted files and test files to run are computed. Say, we add a comment to bert, many files impacted (via dependency) then ma\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Same as https://github.com/huggingface/transformers/pull/40962 but cleans up the code to match transformers API. Couldn't test due to errors, the integration test is failing atm. Still need to clean the testing file\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45140",
-    "created_at": "2026-03-31T09:41:28Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45186",
+    "created_at": "2026-04-02T12:29:46Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45140/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45140",
+    "files_url": "https://github.com/huggingface/transformers/pull/45186/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45186",
     "labels": [],
-    "merged": true,
-    "number": 45140,
+    "merged": false,
+    "number": 45186,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix stupid test fetcher",
-    "updated_at": "2026-03-31T11:06:51Z"
+    "state": "open",
+    "title": "Add new model: Isaac ",
+    "updated_at": "2026-04-02T12:55:56Z"
   },
   {
-    "additions": 757,
-    "author": "ArthurZucker",
+    "additions": 57,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? More fixes",
-    "changed_files": 99,
+    "body_excerpt": "# What does this PR do? If we have videos, the token type ids will be `2` but the current fn checks only image token types. This PR generalizes it rely only on `vision_group_ids` instead of token types",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45139",
-    "created_at": "2026-03-31T09:26:28Z",
-    "deletions": 976,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45185",
+    "created_at": "2026-04-02T11:35:53Z",
+    "deletions": 104,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45139/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45139",
+    "files_url": "https://github.com/huggingface/transformers/pull/45185/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45185",
     "labels": [],
     "merged": true,
-    "number": 45139,
-    "review_comments_count": 26,
+    "number": 45185,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix vllm cis",
-    "updated_at": "2026-04-08T11:19:41Z"
+    "title": "Generalize gemma vision mask to videos",
+    "updated_at": "2026-04-02T13:15:46Z"
   },
   {
-    "additions": 2,
-    "author": "Abdennacer-Badaoui",
+    "additions": 425,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "This is a small T5 expectations update. It is the same for both AMD and NVIDIA A10 GPUs.",
-    "changed_files": 1,
+    "body_excerpt": "# Summary This PR adds CPU offloading to continuous batching. It's in raft until perf and test status are reported. When the GPU KV cache is full and a request must be evicted, we check if there is enough VRAM to copy the request's KV cach\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45138",
-    "created_at": "2026-03-31T08:41:40Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45138/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45138",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45184",
+    "created_at": "2026-04-02T10:12:00Z",
+    "deletions": 52,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45184/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45184",
     "labels": [],
-    "merged": true,
-    "number": 45138,
+    "merged": false,
+    "number": 45184,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "CI] Small T5 expectations updated",
-    "updated_at": "2026-04-02T08:21:25Z"
+    "state": "open",
+    "title": "[CB] [Major] Add CPU request offloading",
+    "updated_at": "2026-04-08T14:16:58Z"
   },
   {
-    "additions": 83,
-    "author": "Cursx",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR fixes a bug in `PreTrainedModel.save_pretrained()` where `config.tie_word_embeddings` can be inconsistent with the actual weight state, leading to silent model corruption for downstream consumers. ### Proble\u2026",
-    "changed_files": 2,
+    "additions": 232,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The `transformers` CLI currently does a lot of work before command dispatch. In particular, the top-level entrypoint eagerly imports `transformers` and CLI subcommands with heavy dependencies, so even simple invocat\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45136",
-    "created_at": "2026-03-31T06:45:53Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45136/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45136",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45136,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #45127: Auto-fix diverged tie_word_embeddings config on save to prevent silent weight corruption",
-    "updated_at": "2026-03-31T12:21:32Z"
-  },
-  {
-    "additions": 44,
-    "author": "Cursx",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes an issue where PEFT adapters applied independently to tied embeddings (`embed_tokens` and [lm_head](cci:1://file:///d:/transformers/transformers/src/transformers/modeling_utils.py:2858:4-2985:26)) cause silent\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45135",
-    "created_at": "2026-03-31T02:28:26Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45135/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45135",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45181",
+    "created_at": "2026-04-02T08:03:40Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45181/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45181",
     "labels": [],
     "merged": false,
-    "number": 45135,
+    "number": 45181,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix model saving corruption for dynamically untied embeddings",
-    "updated_at": "2026-03-31T04:36:56Z"
+    "state": "open",
+    "title": "Make the cli a top-level package",
+    "updated_at": "2026-04-09T12:01:41Z"
   },
   {
-    "additions": 91,
-    "author": "milesial",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Add support for CUDA parakeet preprocessor, running STFT and mel spectrogram extraction on the GPU. This refactor also speeds up the CPU implementation. Tested on `nvidia/parakeet-ctc-0.6b`, B200, 300s audio: Before\u2026",
-    "changed_files": 1,
+    "additions": 48,
+    "author": "paulinebm",
+    "author_association": "MEMBER",
+    "body_excerpt": "## \ud83d\udd12 Pin GitHub Actions to commit SHAs This PR pins all GitHub Actions to their exact commit SHA instead of mutable tags or branch names. **Why?** Pinning to a SHA prevents supply chain attacks where a tag (e.g. `v4`) could be moved to poi\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45134",
-    "created_at": "2026-03-31T01:59:28Z",
-    "deletions": 56,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45180",
+    "created_at": "2026-04-02T08:00:02Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45134/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45134",
+    "files_url": "https://github.com/huggingface/transformers/pull/45180/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45180",
     "labels": [],
-    "merged": false,
-    "number": 45134,
+    "merged": true,
+    "number": 45180,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Optimize Parakeet feature extraction on CUDA",
-    "updated_at": "2026-04-07T14:56:51Z"
+    "state": "closed",
+    "title": "\ud83d\udd12 Pin GitHub Actions to commit SHAs",
+    "updated_at": "2026-04-02T09:12:55Z"
   },
   {
-    "additions": 1944,
-    "author": "itazap",
+    "additions": 327,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written via HF Inference API guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Summary This PR ads minor changes to `cache.update`, updates the memory handler with all new features and refactors a few parts of the code to make it more readable. Cache indexing: - Replace fancy indexing (cache[idx, :, :]) with expli\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45133",
-    "created_at": "2026-03-31T00:00:15Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45133/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45133",
-    "labels": [],
-    "merged": false,
-    "number": 45133,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-31T00:09:59Z"
-  },
-  {
-    "additions": 215,
-    "author": "akintunero",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR fixes GitHub issue #45120: \"Double softmax in MoE router load-balancing loss\". MoE routers in Mixtral, Qwen2MoE, and Qwen3VLMoE were applying softmax inside forward(), then the load_balancing_loss_func applied softmax AG\u2026",
-    "changed_files": 9,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45132",
-    "created_at": "2026-03-30T22:45:01Z",
-    "deletions": 34,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45179",
+    "created_at": "2026-04-02T06:15:08Z",
+    "deletions": 288,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45132/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45132",
+    "files_url": "https://github.com/huggingface/transformers/pull/45179/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45179",
     "labels": [],
-    "merged": false,
-    "number": 45132,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45179,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix: Remove double softmax in MoE router load-balancing loss (Mixtral, Qwen2MoE, Qwen3VLMoE)",
-    "updated_at": "2026-03-30T23:36:57Z"
+    "title": "[CB] Tweaks to update and minor fixes",
+    "updated_at": "2026-04-03T09:36:51Z"
   },
   {
-    "additions": 30,
-    "author": "yacinemebarki",
+    "additions": 4966,
+    "author": "masoudpz",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes issue #45120: Several MoE routers returned softmaxed probabilities as `router_logits`, which caused `load_balancing_loss_func` to compute softmax(softmax(logits)), flattening routing distributions and weakenin\u2026",
-    "changed_files": 13,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45131",
-    "created_at": "2026-03-30T21:18:47Z",
-    "deletions": 30,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45176",
+    "created_at": "2026-04-02T00:47:45Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45131/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45131",
+    "files_url": "https://github.com/huggingface/transformers/pull/45176/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45176",
     "labels": [],
     "merged": false,
-    "number": 45131,
+    "number": 45176,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix MoE routers returning probabilities instead of logits",
-    "updated_at": "2026-04-13T09:45:14Z"
+    "title": "added efficietvitsam model to HF",
+    "updated_at": "2026-04-02T16:01:34Z"
   },
   {
-    "additions": 120,
+    "additions": 49,
     "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the `@auto_docstring` docs as part of the process of improving the model addition docs - updates title to be more precise as it can mean documenting a model in a `model.md` file as well - flat hierarchy before \u2192 each component-type\u2026",
+    "body_excerpt": "updates the docs with changes from #44796: - added section for `--compile` in the serve optimization docs - added section for `--model-timeout` in the Loading models section (useful when a model is silently kicked off and a user doesn't kn\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45130",
-    "created_at": "2026-03-30T20:14:33Z",
-    "deletions": 152,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45174",
+    "created_at": "2026-04-01T23:29:39Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45130/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45130",
+    "files_url": "https://github.com/huggingface/transformers/pull/45174/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45174",
     "labels": [],
-    "merged": false,
-    "number": 45130,
+    "merged": true,
+    "number": 45174,
     "review_comments_count": 2,
-    "state": "open",
-    "title": "[docs] @auto_docstring decorator",
-    "updated_at": "2026-04-09T01:16:52Z"
+    "state": "closed",
+    "title": "[docs] transformers serve",
+    "updated_at": "2026-04-02T16:39:12Z"
   },
   {
-    "additions": 5,
-    "author": "IgnazioDS",
+    "additions": 16,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixed the `qwen3_5` / `qwen3_5_moe` reverse-loading tests by correcting the text model type used in the setup, and aligned the reverse-mapping behavior with gemma3n since they are all native multimodal. This also re\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45173",
+    "created_at": "2026-04-01T20:20:37Z",
+    "deletions": 90,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45173/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45173",
+    "labels": [],
+    "merged": true,
+    "number": 45173,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[misc] fix qwen35 tests: correct the text model type and skip reverse_mapping",
+    "updated_at": "2026-04-02T13:05:39Z"
+  },
+  {
+    "additions": 156,
+    "author": "ezylopx5",
     "author_association": "NONE",
-    "body_excerpt": "## Problem Fixes #45070. `PreTrainedConfig.dtype` was annotated as `Union[str, \"torch.dtype\"] | None`. Since `torch` is only imported under `TYPE_CHECKING`, pydantic's schema builder encounters the `\"torch.dtype\"` forward reference at runt\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## Problem Transformers currently provides sampling filters such as top-k, top-p, min-p, and top-h, but does not include top-n-sigma sampling from \"Top-n\u03c3: Not All Logits Are You Need\". This makes it harder to use a temperature-invariant t\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45129",
-    "created_at": "2026-03-30T19:13:52Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45172",
+    "created_at": "2026-04-01T19:25:12Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45129/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45129",
+    "files_url": "https://github.com/huggingface/transformers/pull/45172/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45172",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45129,
+    "number": 45172,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(config): annotate PreTrainedConfig.dtype as Any to fix pydantic schema generation (#45070)",
-    "updated_at": "2026-03-31T12:33:39Z"
+    "title": "Add TopNSigmaLogitsWarper and top_n_sigma generation config support",
+    "updated_at": "2026-04-04T06:24:26Z"
   },
   {
-    "additions": 443,
-    "author": "FaizanImran-blip",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes a crash in `_process_kwargs_parameters` that occurred when a module uses `from __future__ import annotations`. - Prevents AttributeError crash. - Adds a test `test_future_annotations.py` to verify the fix works. No other func\u2026",
-    "changed_files": 3,
+    "additions": 134,
+    "author": "Kash6",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "When input_boxes contains a mix of None and real box entries (e.g. input_boxes=[None, [[x1,y1,x2,y2]]]), the processor pads None entries with [-10,-10,0,0] but does not generate corresponding input_boxes_labels. The model's geometry encode\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45128",
-    "created_at": "2026-03-30T19:10:48Z",
-    "deletions": 95,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45171",
+    "created_at": "2026-04-01T18:26:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45128/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45128",
+    "files_url": "https://github.com/huggingface/transformers/pull/45171/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45171",
     "labels": [],
     "merged": false,
-    "number": 45128,
+    "number": 45171,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: handle future annotations in _process_kwargs_parameters",
-    "updated_at": "2026-03-31T10:44:40Z"
+    "title": "Fix Sam3Processor missing input_boxes_labels for padded None entries",
+    "updated_at": "2026-04-08T23:23:47Z"
   },
   {
-    "additions": 186,
-    "author": "tarekziade",
+    "additions": 19,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch adds exponential back-off for `load_audio_librosa` / `load_audio_as` / `load_audio_torchcodec` when they try to download a file. That generic utility is also used within `hub_retry` notice that there were\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Before I forget, opening a PR. Will cause conflicts in https://github.com/huggingface/transformers/pull/44431, so I will better merge this after refactoring",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45126",
-    "created_at": "2026-03-30T16:34:40Z",
-    "deletions": 38,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45126/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45126",
-    "labels": [],
-    "merged": true,
-    "number": 45126,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "http retries on audio file downloads",
-    "updated_at": "2026-04-09T15:12:40Z"
-  },
-  {
-    "additions": 3,
-    "author": "danielquintas8",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds `_tp_plan = {\"lm_head\": \"colwise_gather_output\"}` to `Qwen3_5MoeForConditionalGeneration` (the VL wrapper class). The text-only `Qwen3_5MoeForCausalLM` already had `_tp_plan`, but the VL variant was missing it.\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45124",
-    "created_at": "2026-03-30T16:23:11Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45170",
+    "created_at": "2026-04-01T17:50:33Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45124/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45124",
+    "files_url": "https://github.com/huggingface/transformers/pull/45170/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45170",
     "labels": [],
-    "merged": true,
-    "number": 45124,
+    "merged": false,
+    "number": 45170,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Qwen3.5 MoE] Add _tp_plan to ForConditionalGeneration",
-    "updated_at": "2026-04-02T14:10:01Z"
+    "state": "open",
+    "title": "`layrnorm` -> `layernorm`",
+    "updated_at": "2026-04-01T18:03:31Z"
   },
   {
-    "additions": 1,
-    "author": "Rocketknight1",
+    "additions": 10,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "The `test_ocr_queries` assertion value was wrong, even at the initial commit! I'm not sure how tests passed at the time but they're failing now in the CI. This PR fixes the target value!",
-    "changed_files": 1,
+    "body_excerpt": "In https://github.com/huggingface/transformers/pull/45094 I introduced some errors to the remote code resolution when trying to detect if local code belonged to Transformers or not. These tests were: ```bash pytest tests/models/cohere_asr/\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45123",
-    "created_at": "2026-03-30T15:48:42Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45169",
+    "created_at": "2026-04-01T15:27:43Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45123/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45123",
+    "files_url": "https://github.com/huggingface/transformers/pull/45169/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45169",
     "labels": [],
     "merged": true,
-    "number": 45123,
+    "number": 45169,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix PP test_ocr_queries",
-    "updated_at": "2026-03-30T16:28:38Z"
+    "title": "Fix explicit local code resolution for tokenizers and image processors",
+    "updated_at": "2026-04-01T21:48:02Z"
   },
   {
-    "additions": 106,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Native model integration should never have anything related to remote code execution: That just means that we would need to add that model natively, not via remote - it can and will not be maintained by us. Additionally added a new linter\u2026",
-    "changed_files": 5,
+    "additions": 4,
+    "author": "w601sxs",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Based on our experimentation min and max lr for LLMs need to be set properly as defaults. Please refer to paper. For the broader community 1e-7 to 1e-4 are decent defaults # What does this PR do? <!-- Congratulations! You've made it this f\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45122",
-    "created_at": "2026-03-30T15:40:30Z",
-    "deletions": 34,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45168",
+    "created_at": "2026-04-01T15:02:15Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45122/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45122",
-    "labels": [
-      "for patch"
-    ],
-    "merged": true,
-    "number": 45122,
+    "files_url": "https://github.com/huggingface/transformers/pull/45168/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45168",
+    "labels": [],
+    "merged": false,
+    "number": 45168,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": ":rotating_light: [`LightGlue`] Remove remote code execution",
-    "updated_at": "2026-03-31T12:11:19Z"
+    "state": "open",
+    "title": "Update min_lr and max_lr default values to better defaults",
+    "updated_at": "2026-04-08T16:58:59Z"
   },
   {
-    "additions": 56,
-    "author": "orbisai0security",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fix critical severity security issue in `src/transformers/cli/serve.py`. ## Vulnerability | Field | Value | |-------|-------| | **ID** | V-007 | | **Severity** | CRITICAL | | **Scanner** | multi_agent_ai | | **Rule** | `V-007` |\u2026",
+    "additions": 16,
+    "author": "xu-song",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for [Anthropic's JSON function style](https://platform.claude.com/docs/en/agents-and-tools/tool-use/define-tools): `{\"name\": \"...\", \"description\": \"...\", \"input_schema\": {...}}` ## Usage Example\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45121",
-    "created_at": "2026-03-30T15:02:06Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45167",
+    "created_at": "2026-04-01T14:50:33Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45121/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45121",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45167/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45167",
+    "labels": [],
     "merged": false,
-    "number": 45121,
+    "number": 45167,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: remove unsafe exec() in serve.py",
-    "updated_at": "2026-03-31T12:09:59Z"
+    "state": "open",
+    "title": "Add anthropic style of function schema",
+    "updated_at": "2026-04-03T05:49:56Z"
   },
   {
-    "additions": 39,
-    "author": "akintunero",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR fixes GitHub issue #45071: \"v5.4.0 breaks PretrainedConfig type checking\". The regression prevents type checkers (mypy, pyright) from validating `PretrainedConfig` subclass instantiation with valid parameters. ## Root Ca\u2026",
+    "additions": 67,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We didn't think we needed them, but I think we do after all!",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45119",
-    "created_at": "2026-03-30T13:48:47Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45166",
+    "created_at": "2026-04-01T14:46:33Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45119/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45119",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45119,
+    "files_url": "https://github.com/huggingface/transformers/pull/45166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45166",
+    "labels": [],
+    "merged": true,
+    "number": 45166,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Preserve PreTrainedConfig __init__ signatures for type checkers (fixes #45071)",
-    "updated_at": "2026-03-31T12:17:09Z"
+    "title": "Re-add regex substitutions to the response parsing spec",
+    "updated_at": "2026-04-01T15:46:34Z"
   },
   {
-    "additions": 194,
-    "author": "sirzechs66",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
-    "changed_files": 4,
+    "additions": 11,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Emu3 was not updated in recent refactor and blip files were swapped. This PR fixes it Do we need anything to support BC importing from old files, or does it happen in `LazyImports` @yonigozlan ?",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45118",
-    "created_at": "2026-03-30T13:10:36Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45165",
+    "created_at": "2026-04-01T14:18:38Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45118/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45118",
+    "files_url": "https://github.com/huggingface/transformers/pull/45165/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45165",
     "labels": [],
-    "merged": false,
-    "number": 45118,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366, supersedes #43757)",
-    "updated_at": "2026-04-09T15:44:59Z"
+    "merged": true,
+    "number": 45165,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix missing image processors backends",
+    "updated_at": "2026-04-07T13:46:24Z"
   },
   {
-    "additions": 3,
-    "author": "Rocketknight1",
+    "additions": 1,
+    "author": "albertvillanova",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes #45084. We need to resolve the chat template in the Voxtral code to avoid `None` being passed to `_get_template_variables()`! cc @zucchini-nlp, follow-up to #44881",
+    "body_excerpt": "# What does this PR do? Fix TypeError: 'NoneType' object is not iterable in `GenerationMixin.generate` - Fix for None layer_types <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is goin\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45117",
-    "created_at": "2026-03-30T12:34:19Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45164",
+    "created_at": "2026-04-01T13:53:41Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45117/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45117",
+    "files_url": "https://github.com/huggingface/transformers/pull/45164/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45164",
     "labels": [],
     "merged": true,
-    "number": 45117,
+    "number": 45164,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Copy the template resolution logic from the base apply_chat_template to Voxtral",
-    "updated_at": "2026-04-10T14:36:38Z"
+    "title": "Fix TypeError: 'NoneType' object is not iterable in GenerationMixin.generate",
+    "updated_at": "2026-04-03T04:57:18Z"
   },
   {
-    "additions": 222,
-    "author": "sirzechs66",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
-    "changed_files": 5,
+    "additions": 131,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Makes sure the full error is displayed on errors",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45116",
-    "created_at": "2026-03-30T12:18:41Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45116/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45116",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45163",
+    "created_at": "2026-04-01T13:41:18Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45163/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45163",
     "labels": [],
-    "merged": false,
-    "number": 45116,
+    "merged": true,
+    "number": 45163,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366)",
-    "updated_at": "2026-03-30T16:50:24Z"
+    "title": "tweak checkers output on errors",
+    "updated_at": "2026-04-09T11:50:16Z"
   },
   {
-    "additions": 327,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 513,
+    "author": "onwp",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description Re-creates the Turkish documentation that was accidentally removed during the TF/Flax cleanup (commit fce74651). This PR adds the foundational Turkish docs with the complete \"Get Started\" section. ### Files added - `docs/sou\u2026",
     "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45115",
-    "created_at": "2026-03-30T12:09:31Z",
-    "deletions": 375,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45115/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45115",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45159",
+    "created_at": "2026-04-01T05:15:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45159/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45159",
     "labels": [],
     "merged": false,
-    "number": 45115,
+    "number": 45159,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor/nemotron h inherit granitemoehybrid",
-    "updated_at": "2026-03-30T12:23:34Z"
+    "state": "closed",
+    "title": "Add Turkish documentation: Get Started section",
+    "updated_at": "2026-04-01T05:16:49Z"
   },
   {
-    "additions": 16,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch fixes all doctests for the run_doctest job",
+    "additions": 525,
+    "author": "onwp",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR re-creates the Turkish (`tr`) documentation for the Transformers library, starting with the **Get Started** section. The original Turkish translation was accidentally removed in commit fce74651 (#40999). This contributio\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45158",
+    "created_at": "2026-04-01T05:05:40Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45158/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45158",
+    "labels": [],
+    "merged": true,
+    "number": 45158,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Turkish (tr) translation for Get Started section",
+    "updated_at": "2026-04-02T17:50:46Z"
+  },
+  {
+    "additions": 190,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add PrismML 1bit (gguf based, group-size 128) model inference suppport. ## Code Agent Policy - [ ] I confirm that this is not a pure code agent PR. ## Before submitting - [ ] This PR fixes a typo or improves the doc\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45114",
-    "created_at": "2026-03-30T11:08:02Z",
-    "deletions": 10,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45114/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45114",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45157",
+    "created_at": "2026-04-01T03:11:51Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45157/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45157",
     "labels": [],
     "merged": false,
-    "number": 45114,
+    "number": 45157,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: lets fix all doctests",
-    "updated_at": "2026-03-30T11:30:04Z"
+    "title": "[WIP] PrismML Bonsai model support",
+    "updated_at": "2026-04-02T20:53:12Z"
   },
   {
-    "additions": 181,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds GPU Direct Storage (GDS) support for safetensors model loading via `torch.cuda.gds.GdsFile`. GDS is disabled by default, `HF_ENABLE_GDS=1` env is used to enable it. ## Benchmark A100 PCIe 40GB, Samsung\u2026",
-    "changed_files": 3,
+    "additions": 50,
+    "author": "Cursx",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? After the `merge_and_unload()` operation in PEFT, embed_tokens and lm_head become independent tensors with different values, but config.tie_word_embeddings remains True. The load-side already detects this using torc\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45113",
-    "created_at": "2026-03-30T10:55:35Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45113/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45113",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45156",
+    "created_at": "2026-04-01T02:36:38Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45156/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45156",
     "labels": [],
     "merged": false,
-    "number": 45113,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Add GDS support for safetensors loading ",
-    "updated_at": "2026-03-31T15:17:16Z"
+    "number": 45156,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix save_pretrained writing incorrect tie_word_embeddings=True config after PEFT merge",
+    "updated_at": "2026-04-01T09:34:05Z"
   },
   {
-    "additions": 233,
-    "author": "remi-or",
+    "additions": 46,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds a warmup phase before generation starts, turned on by default. It allows for better diagnostics and a more representative user experience than without warmup, where the cost of wamup is payed during the first request rather th\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Add support for loading adapters with `PreTrainedModel.load_adapter` when using TP. See: https://github.com/huggingface/peft/pull/3096",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45112",
-    "created_at": "2026-03-30T10:43:32Z",
-    "deletions": 46,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45155",
+    "created_at": "2026-03-31T22:23:06Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45112/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45112",
+    "files_url": "https://github.com/huggingface/transformers/pull/45155/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45155",
     "labels": [],
     "merged": true,
-    "number": 45112,
-    "review_comments_count": 5,
+    "number": 45155,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[CB] Add warmup feature",
-    "updated_at": "2026-03-31T07:42:18Z"
+    "title": "Load adapter with TP",
+    "updated_at": "2026-04-09T22:35:45Z"
   },
   {
-    "additions": 42,
-    "author": "ionut-anghelina",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Several MoE routers applied softmax inside `forward()` but returned the result as `router_logits`. The `load_balancing_loss_func` then applied softmax **again**, computing the aux loss on `softmax(softmax(logits))` which flatt\u2026",
-    "changed_files": 13,
+    "additions": 676,
+    "author": "FaizanImran-blip",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed bug #45072 / #45071 where PretrainedConfig type checking and Pydantic model field validation were broken in v5.4.0. Added proper type checking for 'num_labels'. Added unit tests in test.py to verify correct and incorrect types. Verif\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45111",
-    "created_at": "2026-03-30T08:23:07Z",
-    "deletions": 42,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45154",
+    "created_at": "2026-03-31T20:28:32Z",
+    "deletions": 153,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45111/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45111",
+    "files_url": "https://github.com/huggingface/transformers/pull/45154/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45154",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45111,
+    "number": 45154,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix double softmax in MoE router load-balancing loss",
-    "updated_at": "2026-03-30T14:09:10Z"
+    "title": "Pretrained-config bug(45072/huggingfacebug)",
+    "updated_at": "2026-04-09T11:43:55Z"
   },
   {
-    "additions": 7035,
-    "author": "NielsRogge",
+    "additions": 502,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? [disclaimer: PR was entirely written by Codex where I just nudge it in the right directions, similar to #44285] ### Feature request I'd like to add support for Meta's [SAM 3.1](https://huggingface.co/facebook/sam3.1\u2026",
-    "changed_files": 20,
+    "body_excerpt": "As per title, with torch releasing the varlen API, we can somewhat use native FA (with limited feature support) Restrictions - Unsupported features - Dropout - Learnable sinks (attention sinks) - Determinism - Softcap - CB KV cache native\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45110",
-    "created_at": "2026-03-30T08:19:42Z",
-    "deletions": 29,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45110/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45110",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45153",
+    "created_at": "2026-03-31T19:43:19Z",
+    "deletions": 386,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45153/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45153",
     "labels": [],
     "merged": false,
-    "number": 45110,
-    "review_comments_count": 3,
+    "number": 45153,
+    "review_comments_count": 25,
     "state": "open",
-    "title": "Add SAM 3.1",
-    "updated_at": "2026-03-30T12:33:41Z"
+    "title": "[`FA`] Native torch integration",
+    "updated_at": "2026-04-01T20:01:16Z"
   },
   {
-    "additions": 42,
-    "author": "aws-zhanxun",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `T5Attention.forward` hard-codes `n_heads` and `inner_dim` in `view()` calls. When using PyTorch Tensor Parallelism, `ColwiseParallel` shards the q/k/v projection output dim from `inner_dim` to `inner_dim / tp_size`\u2026",
-    "changed_files": 6,
+    "additions": 389,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "refactors the testing docs to be more contributor-facing organized around writing model tests instead of being a collection of pytest usage examples and CI maintenance. also updates the pr checks doc so contributors are better prepared to\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45109",
-    "created_at": "2026-03-30T07:06:19Z",
-    "deletions": 48,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45152",
+    "created_at": "2026-03-31T18:12:09Z",
+    "deletions": 1363,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45109/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45109",
+    "files_url": "https://github.com/huggingface/transformers/pull/45152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45152",
     "labels": [],
-    "merged": true,
-    "number": 45109,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix T5Attention shape mismatch under Tensor Parallelism",
-    "updated_at": "2026-04-01T16:21:32Z"
+    "merged": false,
+    "number": 45152,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "[docs] model testing",
+    "updated_at": "2026-04-13T07:09:25Z"
   },
   {
     "additions": 1,
-    "author": "jiqing-feng",
+    "author": "maanas1234",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Description Some wav2vec2 models (e.g. audio classification variants) have `vocab_size: null` in their `config.json`. The current type annotation `vocab_size: int = 32` causes `huggingface_hub`'s strict dataclass validation to reject `N\u2026",
+    "body_excerpt": "# What does this PR do? The previous code snippet had an error in how `TrainingArguments` was being used, which could lead to confusion or incorrect implementation. Corrected the example to properly demonstrate how to initialize and use `T\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45108",
-    "created_at": "2026-03-30T03:06:01Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45150",
+    "created_at": "2026-03-31T17:31:37Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45108/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45108",
+    "files_url": "https://github.com/huggingface/transformers/pull/45150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45150",
     "labels": [],
     "merged": true,
-    "number": 45108,
-    "review_comments_count": 0,
+    "number": 45150,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix `Wav2Vec2Config.vocab_size` type to allow `None`",
-    "updated_at": "2026-04-09T14:56:15Z"
+    "title": "Fix incorrect TrainingArguments example in training.md",
+    "updated_at": "2026-03-31T18:31:25Z"
   },
   {
-    "additions": 1,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Description The `text-to-speech` pipeline crashes when loading models whose `generation_config` contains fields set to `None` (e.g. `use_cache=None`). This is because `TextToAudioPipeline.__init__` blindly copies all generation config v\u2026",
-    "changed_files": 1,
+    "additions": 4633,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "First pass: ~1M+ tokens in, ~115K+ out, Opus mainly, $42, 1h30 # PR #44320 vs Our Implementation ## What we got right - Same overall structure: modular file + generated standalone files + conversion script + tests + docs - Same model direc\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45107",
-    "created_at": "2026-03-30T02:32:13Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45149",
+    "created_at": "2026-03-31T15:56:11Z",
     "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45107/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45107",
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45149/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45149",
     "labels": [],
-    "merged": true,
-    "number": 45107,
+    "merged": false,
+    "number": 45149,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix `text-to-speech` pipeline crash when generation config contains `None` values",
-    "updated_at": "2026-04-08T17:13:05Z"
+    "state": "open",
+    "title": "DO NOT MERGE adding SAML3-LiteText with a skill, first pass",
+    "updated_at": "2026-04-01T06:28:20Z"
   },
   {
-    "additions": 72,
-    "author": "rpathade",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes `_process_kwargs_parameters` crashing with `AttributeError` when `@auto_docstring` is applied in a module that uses `from __future__ import annotations`. Fixes #45103 ## Root cause `from __future__ import anno\u2026",
+    "additions": 13,
+    "author": "HallerPatrick",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45146 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45105",
-    "created_at": "2026-03-29T23:37:13Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45148",
+    "created_at": "2026-03-31T15:20:02Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45105/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45105",
+    "files_url": "https://github.com/huggingface/transformers/pull/45148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45148",
     "labels": [],
     "merged": false,
-    "number": 45105,
+    "number": 45148,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix @auto_docstring crash with from __future__ import annotations in _process_kwargs_parameters",
-    "updated_at": "2026-03-30T00:20:41Z"
+    "state": "closed",
+    "title": "Allow for all layers in Qwen3.5 architecture to be Gated Deltanet.",
+    "updated_at": "2026-04-02T11:18:12Z"
   },
   {
-    "additions": 13,
-    "author": "hkc5",
-    "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #45103 The `@auto_docstring` decorator crashes at import time when applied to a class in a module that uses `from __future__ import annotations`. This is because `from __future__ import annotations` makes all annotatio\u2026",
-    "changed_files": 1,
+    "additions": 309,
+    "author": "mobicham",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes hqq support that has been broken for a couple of months now after a refactoring: * Online quantization works fine now. * Serialization to load/save HQQ models is fixed too. ## Code Agent Policy - [x] I\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45104",
-    "created_at": "2026-03-29T23:26:49Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45147",
+    "created_at": "2026-03-31T14:59:47Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45104/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45104",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45147/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45147",
+    "labels": [],
     "merged": false,
-    "number": 45104,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix auto_docstring crash with from __future__ import annotations",
-    "updated_at": "2026-03-30T12:13:20Z"
+    "number": 45147,
+    "review_comments_count": 8,
+    "state": "open",
+    "title": "Fix broken HQQ support",
+    "updated_at": "2026-04-09T16:14:06Z"
   },
   {
-    "additions": 1664,
-    "author": "HemanthSai7",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Co-authored-by: Vishesht27 This PR adds support for codes for the upcoming Nandi series models. We also appreciate the valuable feedback and thorough review provided by @vasqu and @ArthurZucker \ud83e\udd17\ud83d\ude4f",
-    "changed_files": 11,
+    "additions": 2151,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, this PR aims to add the MiMo-V2-Flash model to the Transformers library Fixes https://github.com/huggingface/transformers/issues/42954 MiMo-V2 is \"The last of the SOTAs\" that isn't natively supported by the T\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45101",
-    "created_at": "2026-03-29T20:35:56Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45144",
+    "created_at": "2026-03-31T13:53:28Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45101/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45101",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45144",
+    "labels": [
+      "New model"
+    ],
     "merged": false,
-    "number": 45101,
-    "review_comments_count": 0,
+    "number": 45144,
+    "review_comments_count": 30,
     "state": "open",
-    "title": "Adding support for Nandi Models",
-    "updated_at": "2026-04-05T09:53:35Z"
+    "title": "Add Xiaomi MiMo-V2",
+    "updated_at": "2026-04-14T14:26:53Z"
   },
   {
-    "additions": 1,
-    "author": "code-runner77",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "Improve wording in accelerator selection documentation # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you\u2026",
-    "changed_files": 1,
+    "additions": 102,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR adds `parse_response` to Processor classes by wrapping the `Tokenizer` method! cc @zucchini-nlp",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45100",
-    "created_at": "2026-03-29T17:28:59Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45143",
+    "created_at": "2026-03-31T13:11:49Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45100/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45100",
+    "files_url": "https://github.com/huggingface/transformers/pull/45143/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45143",
     "labels": [],
-    "merged": false,
-    "number": 45100,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45143,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Update accelerator_selection.md",
-    "updated_at": "2026-03-30T13:40:51Z"
+    "title": "Add parse_response to Processor, make it a bit more official",
+    "updated_at": "2026-03-31T17:07:16Z"
   },
   {
-    "additions": 2,
-    "author": "zendy199x",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review an\u2026",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45098",
-    "created_at": "2026-03-29T15:50:04Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45142",
+    "created_at": "2026-03-31T12:47:46Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45098/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45098",
+    "files_url": "https://github.com/huggingface/transformers/pull/45142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45142",
     "labels": [],
     "merged": false,
-    "number": 45098,
+    "number": 45142,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: incomplete string literal causes syntax error in config docstring checker",
-    "updated_at": "2026-03-30T13:27:17Z"
+    "title": "refactor(gpt-oss): rename `eager_attention_forward` to `eager_attention_forward_with_sink`",
+    "updated_at": "2026-04-02T16:44:14Z"
   },
   {
-    "additions": 771,
-    "author": "baonudesifeizhai",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR extends the InternVL conversion script to support the old `OpenGVLab/InternVL2-1B` and `OpenGVLab/InternVL2-2B` checkpoints. These checkpoints currently rely on remote code and are problematic for downstream\u2026",
-    "changed_files": 14,
+    "additions": 19,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Really stupid ... Currently, change a docstring will count as \"modified files\", then the impacted files and test files to run are computed. Say, we add a comment to bert, many files impacted (via dependency) then ma\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45097",
-    "created_at": "2026-03-29T05:29:42Z",
-    "deletions": 30,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45140",
+    "created_at": "2026-03-31T09:41:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45097/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45097",
+    "files_url": "https://github.com/huggingface/transformers/pull/45140/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45140",
     "labels": [],
-    "merged": false,
-    "number": 45097,
+    "merged": true,
+    "number": 45140,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add old InternVL2-1B/2B support to the InternVL conversion script #45092",
-    "updated_at": "2026-04-02T07:59:13Z"
+    "state": "closed",
+    "title": "Fix stupid test fetcher",
+    "updated_at": "2026-03-31T11:06:51Z"
   },
   {
-    "additions": 5,
-    "author": "hkc5",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem Old remote-code checkpoints (like InternVL2) perform real-tensor operations during model construction (e.g., calling `.item()` on tensors). This causes `RuntimeError: Tensor.item() cannot be called on meta tensors` when models a\u2026",
-    "changed_files": 1,
+    "additions": 757,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? More fixes",
+    "changed_files": 99,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45096",
-    "created_at": "2026-03-29T05:13:12Z",
-    "deletions": 1,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45139",
+    "created_at": "2026-03-31T09:26:28Z",
+    "deletions": 976,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45096/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45096",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45096,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/45139/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45139",
+    "labels": [],
+    "merged": true,
+    "number": 45139,
+    "review_comments_count": 26,
     "state": "closed",
-    "title": "Fix: Skip meta device initialization for remote code models",
-    "updated_at": "2026-03-30T12:13:08Z"
+    "title": "Fix vllm cis",
+    "updated_at": "2026-04-08T11:19:41Z"
   },
   {
-    "additions": 100,
-    "author": "HanFa",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "When a config class has been explicitly registered via AutoConfig.register(), it should take precedence over auto_map remote code. Previously, `trust_remote_code=True` with auto_map.AutoConfig in config.json would always load remote code,\u2026",
-    "changed_files": 14,
+    "additions": 2,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is a small T5 expectations update. It is the same for both AMD and NVIDIA A10 GPUs.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45094",
-    "created_at": "2026-03-29T04:21:29Z",
-    "deletions": 17,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45138",
+    "created_at": "2026-03-31T08:41:40Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45094/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45094",
+    "files_url": "https://github.com/huggingface/transformers/pull/45138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45138",
     "labels": [],
     "merged": true,
-    "number": 45094,
+    "number": 45138,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: prefer registered config over remote code in AutoConfig.from_pretrained",
-    "updated_at": "2026-03-31T14:56:49Z"
+    "title": "CI] Small T5 expectations updated",
+    "updated_at": "2026-04-02T08:21:25Z"
   },
   {
-    "additions": 6,
-    "author": "hkc5",
+    "additions": 83,
+    "author": "Cursx",
     "author_association": "NONE",
-    "body_excerpt": "This PR fixes the unexpected behaviour of helper function `_get_feat_extract_output_lengths` in qwen3_omni_moe as reported in #45083. ## Problem The current implementation incorrectly calculates the output length of the convolutional layer\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR fixes a bug in `PreTrainedModel.save_pretrained()` where `config.tie_word_embeddings` can be inconsistent with the actual weight state, leading to silent model corruption for downstream consumers. ### Proble\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45091",
-    "created_at": "2026-03-29T00:37:06Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45136",
+    "created_at": "2026-03-31T06:45:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45091/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45091",
+    "files_url": "https://github.com/huggingface/transformers/pull/45136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45136",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45091,
+    "number": 45136,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix _get_feat_extract_output_lengths in qwen3_omni_moe",
-    "updated_at": "2026-03-30T12:12:57Z"
+    "title": "Fix #45127: Auto-fix diverged tie_word_embeddings config on save to prevent silent weight corruption",
+    "updated_at": "2026-03-31T12:21:32Z"
   },
   {
-    "additions": 17,
-    "author": "hkc5",
+    "additions": 44,
+    "author": "Cursx",
     "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #45084 The `VoxtralProcessor.apply_chat_template` method was calling `_get_template_variables(chat_template)` without first checking if `chat_template` was None. This caused a `TypeError: Can't compile non template nod\u2026",
+    "body_excerpt": "# What does this PR do? Fixes an issue where PEFT adapters applied independently to tied embeddings (`embed_tokens` and [lm_head](cci:1://file:///d:/transformers/transformers/src/transformers/modeling_utils.py:2858:4-2985:26)) cause silent\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45090",
-    "created_at": "2026-03-29T00:35:00Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45135",
+    "created_at": "2026-03-31T02:28:26Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45090/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45090",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45135",
+    "labels": [],
     "merged": false,
-    "number": 45090,
+    "number": 45135,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError when chat_template is None in VoxtralProcessor",
-    "updated_at": "2026-03-30T12:10:25Z"
+    "title": "Fix model saving corruption for dynamically untied embeddings",
+    "updated_at": "2026-03-31T04:36:56Z"
   },
   {
-    "additions": 4,
-    "author": "Krishnachaitanyakc",
+    "additions": 91,
+    "author": "milesial",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #45003 `_can_set_attn_implementation` and `_can_set_experts_implementation` in `PreTrainedModel` use `sys.modules[cls.__module__]`, which raises `KeyError` when a module has been removed from `sys.modules` at runtime. This\u2026",
+    "body_excerpt": "# What does this PR do? Add support for CUDA parakeet preprocessor, running STFT and mel spectrogram extraction on the GPU. This refactor also speeds up the CPU implementation. Tested on `nvidia/parakeet-ctc-0.6b`, B200, 300s audio: Before\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45089",
-    "created_at": "2026-03-28T16:44:06Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45089/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45089",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45089,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: use sys.modules.get() to avoid KeyError in modeling_utils",
-    "updated_at": "2026-03-30T14:19:30Z"
-  },
-  {
-    "additions": 6,
-    "author": "knQzx",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "corrects the conv output length calculation in _get_feat_extract_output_lengths which was computing wrong values for the audio encoder. fixes #45083",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45088",
-    "created_at": "2026-03-28T16:40:07Z",
-    "deletions": 9,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45134",
+    "created_at": "2026-03-31T01:59:28Z",
+    "deletions": 56,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45088/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45088",
+    "files_url": "https://github.com/huggingface/transformers/pull/45134/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45134",
     "labels": [],
     "merged": false,
-    "number": 45088,
+    "number": 45134,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix audio encoder output length formula in qwen3_omni_moe",
-    "updated_at": "2026-03-30T12:53:42Z"
+    "state": "open",
+    "title": "Optimize Parakeet feature extraction on CUDA",
+    "updated_at": "2026-04-07T14:56:51Z"
   },
   {
-    "additions": 5,
-    "author": "hkc5",
-    "author_association": "NONE",
-    "body_excerpt": "This PR fixes issue #45071 where mypy type checking was broken for PretrainedConfig subclasses. ## Problem In transformers v5.4.0, the PretrainedConfig class was converted to a dataclass with a wrapper around __init__ to accept arbitrary k\u2026",
-    "changed_files": 1,
+    "additions": 1944,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written via HF Inference API guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45087",
-    "created_at": "2026-03-28T16:38:11Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45133",
+    "created_at": "2026-03-31T00:00:15Z",
     "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45087/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45087",
-    "labels": [
-      "Code agent slop"
-    ],
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45133/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45133",
+    "labels": [],
     "merged": false,
-    "number": 45087,
+    "number": 45133,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix PretrainedConfig type checking with mypy",
-    "updated_at": "2026-03-30T12:12:48Z"
+    "state": "open",
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-31T00:09:59Z"
   },
   {
-    "additions": 3,
-    "author": "knQzx",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "the function accesses backend_tokenizer.pre_tokenizer but the tokenizer passed is already the raw rust object, so it should be pre_tokenizer directly. fixes #45081",
-    "changed_files": 1,
+    "additions": 215,
+    "author": "akintunero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR fixes GitHub issue #45120: \"Double softmax in MoE router load-balancing loss\". MoE routers in Mixtral, Qwen2MoE, and Qwen3VLMoE were applying softmax inside forward(), then the load_balancing_loss_func applied softmax AG\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45086",
-    "created_at": "2026-03-28T16:37:49Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45132",
+    "created_at": "2026-03-30T22:45:01Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45086/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45086",
+    "files_url": "https://github.com/huggingface/transformers/pull/45132/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45132",
     "labels": [],
     "merged": false,
-    "number": 45086,
+    "number": 45132,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix AttributeError in _patch_mistral_regex",
-    "updated_at": "2026-03-28T16:37:49Z"
+    "state": "closed",
+    "title": "Fix: Remove double softmax in MoE router load-balancing loss (Mixtral, Qwen2MoE, Qwen3VLMoE)",
+    "updated_at": "2026-03-30T23:36:57Z"
   },
   {
-    "additions": 7,
-    "author": "hkc5",
-    "author_association": "NONE",
-    "body_excerpt": "This PR fixes #45072. ## Changes ### SwitchTransformers - Fixed a bug in `SwitchTransformersTop1Router.forward()` where `router_logits` was being reassigned to the max probability values instead of keeping the raw logits from the classifie\u2026",
-    "changed_files": 3,
+    "additions": 30,
+    "author": "yacinemebarki",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes issue #45120: Several MoE routers returned softmaxed probabilities as `router_logits`, which caused `load_balancing_loss_func` to compute softmax(softmax(logits)), flattening routing distributions and weakenin\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45085",
-    "created_at": "2026-03-28T16:28:27Z",
-    "deletions": 7,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45131",
+    "created_at": "2026-03-30T21:18:47Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45085/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45085",
+    "files_url": "https://github.com/huggingface/transformers/pull/45131/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45131",
     "labels": [],
-    "merged": false,
-    "number": 45085,
+    "merged": true,
+    "number": 45131,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix dtype mismatches in SwitchTransformers and TimmWrapperModel for bfloat16",
-    "updated_at": "2026-03-30T11:25:14Z"
+    "title": "Fix MoE routers returning probabilities instead of logits",
+    "updated_at": "2026-04-13T09:53:56Z"
   },
   {
-    "additions": 143,
-    "author": "NielsRogge",
+    "additions": 120,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates the conversion script of VidEoMT to convert all remaining checkpoints. Find them here: https://huggingface.co/papers/2602.17807",
+    "body_excerpt": "updates the `@auto_docstring` docs as part of the process of improving the model addition docs - updates title to be more precise as it can mean documenting a model in a `model.md` file as well - flat hierarchy before \u2192 each component-type\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45082",
-    "created_at": "2026-03-28T14:07:12Z",
-    "deletions": 14,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45130",
+    "created_at": "2026-03-30T20:14:33Z",
+    "deletions": 152,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45082/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45082",
+    "files_url": "https://github.com/huggingface/transformers/pull/45130/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45130",
     "labels": [],
     "merged": false,
-    "number": 45082,
-    "review_comments_count": 0,
+    "number": 45130,
+    "review_comments_count": 2,
     "state": "open",
-    "title": "[VidEoMT] Update conversion script",
-    "updated_at": "2026-03-28T14:16:54Z"
+    "title": "[docs] @auto_docstring decorator",
+    "updated_at": "2026-04-09T01:16:52Z"
   },
   {
-    "additions": 13,
-    "author": "joaquinhuigomez",
+    "additions": 5,
+    "author": "IgnazioDS",
     "author_association": "NONE",
-    "body_excerpt": "## Root cause The v5.4.0 release converted `PreTrainedConfig` from a regular class to a `@dataclass`. This changes how Pydantic handles it when used as a field type in a `BaseModel`: instead of treating it as an opaque arbitrary type, Pyda\u2026",
+    "body_excerpt": "## Problem Fixes #45070. `PreTrainedConfig.dtype` was annotated as `Union[str, \"torch.dtype\"] | None`. Since `torch` is only imported under `TYPE_CHECKING`, pydantic's schema builder encounters the `\"torch.dtype\"` forward reference at runt\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45080",
-    "created_at": "2026-03-28T12:13:57Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45129",
+    "created_at": "2026-03-30T19:13:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45080/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45080",
-    "labels": [],
-    "merged": false,
-    "number": 45080,
+    "files_url": "https://github.com/huggingface/transformers/pull/45129/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45129",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45129,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix PreTrainedConfig as Pydantic field type after dataclass conversion",
-    "updated_at": "2026-03-29T19:36:24Z"
+    "title": "fix(config): annotate PreTrainedConfig.dtype as Any to fix pydantic schema generation (#45070)",
+    "updated_at": "2026-03-31T12:33:39Z"
   },
   {
-    "additions": 50,
-    "author": "javierdejesusda",
+    "additions": 443,
+    "author": "FaizanImran-blip",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes a crash in `_process_kwargs_parameters` that occurred when a module uses `from __future__ import annotations`. - Prevents AttributeError crash. - Adds a test `test_future_annotations.py` to verify the fix works. No other func\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45128",
+    "created_at": "2026-03-30T19:10:48Z",
+    "deletions": 95,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45128/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45128",
+    "labels": [],
+    "merged": false,
+    "number": 45128,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix: handle future annotations in _process_kwargs_parameters",
+    "updated_at": "2026-03-31T10:44:40Z"
+  },
+  {
+    "additions": 186,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch adds exponential back-off for `load_audio_librosa` / `load_audio_as` / `load_audio_torchcodec` when they try to download a file. That generic utility is also used within `hub_retry` notice that there were\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45126",
+    "created_at": "2026-03-30T16:34:40Z",
+    "deletions": 38,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45126/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45126",
+    "labels": [],
+    "merged": true,
+    "number": 45126,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "http retries on audio file downloads",
+    "updated_at": "2026-04-09T15:12:40Z"
+  },
+  {
+    "additions": 3,
+    "author": "danielquintas8",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #35141 When `tie_word_embeddings=False`, calling `resize_token_embeddings()` then `post_init()` overwrites the LM head weights with random values. This happens because `_get_resized_lm_head()` returns a new `\u2026",
+    "body_excerpt": "# What does this PR do? Adds `_tp_plan = {\"lm_head\": \"colwise_gather_output\"}` to `Qwen3_5MoeForConditionalGeneration` (the VL wrapper class). The text-only `Qwen3_5MoeForCausalLM` already had `_tp_plan`, but the VL variant was missing it.\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45079",
-    "created_at": "2026-03-28T00:06:03Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45124",
+    "created_at": "2026-03-30T16:23:11Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45079/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45079",
+    "files_url": "https://github.com/huggingface/transformers/pull/45124/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45124",
     "labels": [],
     "merged": true,
-    "number": 45079,
+    "number": 45124,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix resized LM head weights being overwritten by post_init",
-    "updated_at": "2026-04-02T14:13:31Z"
+    "title": "[Qwen3.5 MoE] Add _tp_plan to ForConditionalGeneration",
+    "updated_at": "2026-04-02T14:10:01Z"
   },
   {
-    "additions": 33,
-    "author": "itazap",
+    "additions": 1,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "fixes fallback https://github.com/huggingface/transformers/issues/44993",
-    "changed_files": 2,
+    "body_excerpt": "The `test_ocr_queries` assertion value was wrong, even at the initial commit! I'm not sure how tests passed at the time but they're failing now in the CI. This PR fixes the target value!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45078",
-    "created_at": "2026-03-27T23:06:36Z",
-    "deletions": 9,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45123",
+    "created_at": "2026-03-30T15:48:42Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45078/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45078",
+    "files_url": "https://github.com/huggingface/transformers/pull/45123/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45123",
     "labels": [],
-    "merged": false,
-    "number": 45078,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "throw error when conversion required",
-    "updated_at": "2026-04-13T09:09:31Z"
+    "merged": true,
+    "number": 45123,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix PP test_ocr_queries",
+    "updated_at": "2026-03-30T16:28:38Z"
   },
   {
-    "additions": 312,
-    "author": "dagecko",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Re-submission of #45010. Had a problem with my fork and had to delete it, which closed the original PR. Apologies for the noise. @tarekziade @ydshieh I noticed you fixed the critical findings from the original PR, which is great. This resu\u2026",
-    "changed_files": 22,
+    "additions": 106,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Native model integration should never have anything related to remote code execution: That just means that we would need to add that model natively, not via remote - it can and will not be maintained by us. Additionally added a new linter\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45077",
-    "created_at": "2026-03-27T22:20:56Z",
-    "deletions": 312,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45122",
+    "created_at": "2026-03-30T15:40:30Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45077/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45077",
-    "labels": [],
-    "merged": false,
-    "number": 45077,
+    "files_url": "https://github.com/huggingface/transformers/pull/45122/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45122",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45122,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: pin 50 unpinned actions to commit SHA, extract 1 secret to env var",
-    "updated_at": "2026-03-27T22:20:56Z"
+    "state": "closed",
+    "title": ":rotating_light: [`LightGlue`] Remove remote code execution",
+    "updated_at": "2026-03-31T12:11:19Z"
   },
   {
-    "additions": 376,
-    "author": "osman-akkawi",
+    "additions": 56,
+    "author": "orbisai0security",
     "author_association": "NONE",
-    "body_excerpt": "As Osman Akkawi, I am proud to submit this comprehensive Pull Request which introduces two world-first, unique innovations to the transformers library alongside essential codebase maintenance. This PR transforms how users interact with and\u2026",
-    "changed_files": 8,
+    "body_excerpt": "## Summary Fix critical severity security issue in `src/transformers/cli/serve.py`. ## Vulnerability | Field | Value | |-------|-------| | **ID** | V-007 | | **Severity** | CRITICAL | | **Scanner** | multi_agent_ai | | **Rule** | `V-007` |\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45076",
-    "created_at": "2026-03-27T20:30:37Z",
-    "deletions": 9,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45121",
+    "created_at": "2026-03-30T15:02:06Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45076/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45076",
+    "files_url": "https://github.com/huggingface/transformers/pull/45121/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45121",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45076,
+    "number": 45121,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Osman-Level Innovations: Hardware-Aware Advisor & Selective Weight Surgery CLI",
-    "updated_at": "2026-04-06T10:49:18Z"
+    "title": "fix: remove unsafe exec() in serve.py",
+    "updated_at": "2026-03-31T12:09:59Z"
   },
   {
-    "additions": 5184,
-    "author": "thisisiron",
+    "additions": 39,
+    "author": "akintunero",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds the **DeepSeek-OCR2** model. ### Reference - Arxiv Paper: [DeepSeek-OCR 2: Visual Causal Flow](https://arxiv.org/abs/2601.20552) - Huggingface hub: [deepseek-ai/DeepSeek-OCR-2](https://huggingface.co/deepseek-a\u2026",
-    "changed_files": 21,
+    "body_excerpt": "## Summary This PR fixes GitHub issue #45071: \"v5.4.0 breaks PretrainedConfig type checking\". The regression prevents type checkers (mypy, pyright) from validating `PretrainedConfig` subclass instantiation with valid parameters. ## Root Ca\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45075",
-    "created_at": "2026-03-27T20:14:27Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45119",
+    "created_at": "2026-03-30T13:48:47Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45075/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45075",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45119/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45119",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45075,
-    "review_comments_count": 44,
-    "state": "open",
-    "title": "Add Deepseek-OCR-2 model",
-    "updated_at": "2026-04-12T16:07:45Z"
+    "number": 45119,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: Preserve PreTrainedConfig __init__ signatures for type checkers (fixes #45071)",
+    "updated_at": "2026-03-31T12:17:09Z"
   },
   {
-    "additions": 12,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following dtype mismatch use cases were identified and fixed in this PR: \u2192 **Switch Transformers:** [7938e91fa](https://github.com/harshaljanjani/transformers/commit/7938e91faabb051f3a001cd39c173d4697c2d81c) r\u2026",
+    "additions": 194,
+    "author": "sirzechs66",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45074",
-    "created_at": "2026-03-27T20:02:28Z",
-    "deletions": 1,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45118",
+    "created_at": "2026-03-30T13:10:36Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45074/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45074",
+    "files_url": "https://github.com/huggingface/transformers/pull/45118/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45118",
     "labels": [],
-    "merged": true,
-    "number": 45074,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "fix(models): Fix dtype mismatch in SwitchTransformers and TimmWrapperModel",
-    "updated_at": "2026-04-02T13:59:46Z"
+    "merged": false,
+    "number": 45118,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366, supersedes #43757)",
+    "updated_at": "2026-04-09T15:44:59Z"
   },
   {
-    "additions": 1239,
-    "author": "Aravind-11",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "What does this pr do? - Add modular_owlvit.py inheriting CLIP vision/text embeddings, MLP, encoder layer, encoder - Import box IoU helpers from loss_for_object_detection; eager_attention from BERT - Regenerate modeling_owlvit.py via modula\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #45084. We need to resolve the chat template in the Voxtral code to avoid `None` being passed to `_get_template_variables()`! cc @zucchini-nlp, follow-up to #44881",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45073",
-    "created_at": "2026-03-27T20:00:41Z",
-    "deletions": 135,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45117",
+    "created_at": "2026-03-30T12:34:19Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45073/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45073",
+    "files_url": "https://github.com/huggingface/transformers/pull/45117/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45117",
     "labels": [],
-    "merged": false,
-    "number": 45073,
+    "merged": true,
+    "number": 45117,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor OwlViT to modular Transformers",
-    "updated_at": "2026-04-09T15:42:31Z"
+    "state": "closed",
+    "title": "Copy the template resolution logic from the base apply_chat_template to Voxtral",
+    "updated_at": "2026-04-10T14:36:38Z"
   },
   {
-    "additions": 1,
-    "author": "Fr0do",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `TypeError` in `_check_received_keys` (line 919 of `modeling_rope_utils.py`) where `received_keys -= ignore_keys` fails when `ignore_keys` is a `list` instead of a `set`. ## Root cause Model configs (Qwen3.\u2026",
-    "changed_files": 1,
+    "additions": 222,
+    "author": "sirzechs66",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45069",
-    "created_at": "2026-03-27T19:21:01Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45069/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45069",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45116",
+    "created_at": "2026-03-30T12:18:41Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45116/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45116",
     "labels": [],
-    "merged": true,
-    "number": 45069,
+    "merged": false,
+    "number": 45116,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in rope validation when ignore_keys is a list",
-    "updated_at": "2026-03-30T11:41:12Z"
+    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366)",
+    "updated_at": "2026-03-30T16:50:24Z"
   },
   {
-    "additions": 22,
-    "author": "aarushisingh04",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### what does this PR do? this pr enables `trainer.train(resume_from_checkpoint=...)` to accept hugging face hub repository ids. instead of only local paths, users can now pass `user/repo@revision` and the trainer will automatically downlo\u2026",
-    "changed_files": 1,
+    "additions": 327,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45067",
-    "created_at": "2026-03-27T18:26:03Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45067/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45067",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45115",
+    "created_at": "2026-03-30T12:09:31Z",
+    "deletions": 375,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45115/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45115",
     "labels": [],
     "merged": false,
-    "number": 45067,
+    "number": 45115,
     "review_comments_count": 0,
     "state": "open",
-    "title": "feat: trainer resume_from_checkpoint support hub downloads (#43375)",
-    "updated_at": "2026-03-27T18:52:10Z"
+    "title": "Refactor/nemotron h inherit granitemoehybrid",
+    "updated_at": "2026-03-30T12:23:34Z"
   },
   {
-    "additions": 305,
-    "author": "osman-akkawi",
-    "author_association": "NONE",
-    "body_excerpt": "As **Osman Akkawi**, I am proud to submit this Pull Request which introduces a world-first, unique feature to the `transformers` library alongside essential codebase maintenance. This PR focuses on one goal: making state-of-the-art models\u2026",
-    "changed_files": 7,
+    "additions": 16,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch fixes all doctests for the run_doctest job",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45066",
-    "created_at": "2026-03-27T17:04:14Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45114",
+    "created_at": "2026-03-30T11:08:02Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45066/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45066",
+    "files_url": "https://github.com/huggingface/transformers/pull/45114/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45114",
     "labels": [],
     "merged": false,
-    "number": 45066,
+    "number": 45114,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[PR] Unique Enhancement: Transformers Model Advisor & Legacy Cleanup",
-    "updated_at": "2026-03-27T20:31:59Z"
+    "state": "open",
+    "title": "fix: lets fix all doctests",
+    "updated_at": "2026-03-30T11:30:04Z"
   },
   {
-    "additions": 0,
-    "author": "Sai-Suraj-27",
+    "additions": 181,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Removes TensorFlow's `\"TF_CPP_MIN_LOG_LEVEL\"` env var. This is no longer needed since TF/Jax are gone. This `utils/print_env.py` script is being used in CI running tests to print, req useful env vars. ## Code Agent\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45065",
-    "created_at": "2026-03-27T16:43:30Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45065/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45065",
-    "labels": [],
-    "merged": true,
-    "number": 45065,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Remove unused TensorFlow env var",
-    "updated_at": "2026-03-27T17:24:30Z"
-  },
-  {
-    "additions": 101,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? check modular import can be extremely slow (8mn in CI) we're investigating speeding it up in https://github.com/huggingface/transformers/pull/45046 But we can also shard jobs in CI to mitigate a little bit. This pat\u2026",
+    "body_excerpt": "# What does this PR do? This PR adds GPU Direct Storage (GDS) support for safetensors model loading via `torch.cuda.gds.GdsFile`. GDS is disabled by default, `HF_ENABLE_GDS=1` env is used to enable it. ## Benchmark A100 PCIe 40GB, Samsung\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45064",
-    "created_at": "2026-03-27T16:26:27Z",
-    "deletions": 34,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45064/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45064",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45113",
+    "created_at": "2026-03-30T10:55:35Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45113/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45113",
     "labels": [],
     "merged": false,
-    "number": 45064,
-    "review_comments_count": 0,
+    "number": 45113,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "refactor: shard checkers",
-    "updated_at": "2026-03-27T17:05:37Z"
+    "title": "Add GDS support for safetensors loading ",
+    "updated_at": "2026-03-31T15:17:16Z"
   },
   {
-    "additions": 176,
-    "author": "SunMarc",
+    "additions": 233,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds some features that makes serving more efficient. It shouldn't impact `generate_batch` at all: - Per-request result delivery via callbacks (replaces shared queue contention). Added `_request_callbacks` d\u2026",
-    "changed_files": 3,
+    "body_excerpt": "This PR adds a warmup phase before generation starts, turned on by default. It allows for better diagnostics and a more representative user experience than without warmup, where the cost of wamup is payed during the first request rather th\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45063",
-    "created_at": "2026-03-27T16:07:43Z",
-    "deletions": 30,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45112",
+    "created_at": "2026-03-30T10:43:32Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45063/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45063",
+    "files_url": "https://github.com/huggingface/transformers/pull/45112/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45112",
     "labels": [],
     "merged": true,
-    "number": 45063,
-    "review_comments_count": 10,
+    "number": 45112,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "CB improvements for serving ",
-    "updated_at": "2026-03-30T18:48:33Z"
+    "title": "[CB] Add warmup feature",
+    "updated_at": "2026-03-31T07:42:18Z"
   },
   {
-    "additions": 55,
-    "author": "ErenAta16",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR adds a regression test for Unicode corruption when decoding `added_tokens` with ByteLevel tokenizers (e.g. GPT-2 family). In affected cases, characters such as `\u010d`, `\u0107`, `\u0111` can decode into control characters (`\\r`, `\\x07`, `\\x11`)\u2026",
-    "changed_files": 2,
+    "additions": 42,
+    "author": "ionut-anghelina",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Several MoE routers applied softmax inside `forward()` but returned the result as `router_logits`. The `load_balancing_loss_func` then applied softmax **again**, computing the aux loss on `softmax(softmax(logits))` which flatt\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45062",
-    "created_at": "2026-03-27T15:23:38Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45111",
+    "created_at": "2026-03-30T08:23:07Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45062/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45062",
+    "files_url": "https://github.com/huggingface/transformers/pull/45111/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45111",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45062,
-    "review_comments_count": 1,
+    "number": 45111,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add regression test for ByteLevel added-token Unicode decode corruption",
-    "updated_at": "2026-03-27T23:36:35Z"
+    "title": "Fix double softmax in MoE router load-balancing loss",
+    "updated_at": "2026-03-30T14:09:10Z"
   },
   {
-    "additions": 12,
-    "author": "vasqu",
+    "additions": 7035,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, see https://github.com/huggingface/transformers/pull/42435#issuecomment-4143234736",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? [disclaimer: PR was entirely written by Codex where I just nudge it in the right directions, similar to #44285] ### Feature request I'd like to add support for Meta's [SAM 3.1](https://huggingface.co/facebook/sam3.1\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45061",
-    "created_at": "2026-03-27T15:09:50Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45110",
+    "created_at": "2026-03-30T08:19:42Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45061/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45061",
-    "labels": [
-      "for patch"
-    ],
-    "merged": true,
-    "number": 45061,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[`FA`] Fix BC support for a few versions + add deprecation cycle",
-    "updated_at": "2026-03-27T15:37:13Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/45110/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45110",
+    "labels": [],
+    "merged": false,
+    "number": 45110,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add SAM 3.1",
+    "updated_at": "2026-03-30T12:33:41Z"
   },
   {
-    "additions": 54,
-    "author": "ErenAta16",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes a regression where PIL-based image/video processors were incorrectly treated as requiring `torchvision`. As a result, `AutoProcessor` / `AutoImageProcessor` could fail in environments without `torchvision`, even though a vali\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45060",
-    "created_at": "2026-03-27T13:43:38Z",
-    "deletions": 3,
+    "additions": 42,
+    "author": "aws-zhanxun",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `T5Attention.forward` hard-codes `n_heads` and `inner_dim` in `view()` calls. When using PyTorch Tensor Parallelism, `ColwiseParallel` shards the q/k/v projection output dim from `inner_dim` to `inner_dim / tp_size`\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45109",
+    "created_at": "2026-03-30T07:06:19Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45060/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45060",
+    "files_url": "https://github.com/huggingface/transformers/pull/45109/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45109",
     "labels": [],
-    "merged": false,
-    "number": 45060,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix PIL backend fallback when torchvision is unavailable",
-    "updated_at": "2026-03-30T13:50:34Z"
+    "merged": true,
+    "number": 45109,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix T5Attention shape mismatch under Tensor Parallelism",
+    "updated_at": "2026-04-01T16:21:32Z"
   },
   {
-    "additions": 9,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some checkpoints, such as https://huggingface.co/omni-research/Tarsier2-Recap-7b, have the wrong `model_type` in their `config.json`. This PR allows advanced users (vLLM) to pass `model_type` into `AutoConfig.from_pretrained` via `kwargs`\u2026",
+    "additions": 1,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description Some wav2vec2 models (e.g. audio classification variants) have `vocab_size: null` in their `config.json`. The current type annotation `vocab_size: int = 32` causes `huggingface_hub`'s strict dataclass validation to reject `N\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45058",
-    "created_at": "2026-03-27T13:24:05Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45108",
+    "created_at": "2026-03-30T03:06:01Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45058/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45058",
+    "files_url": "https://github.com/huggingface/transformers/pull/45108/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45108",
     "labels": [],
     "merged": true,
-    "number": 45058,
+    "number": 45108,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow advanced users to override `model_type` in `AutoConfig.from_pretrained`",
-    "updated_at": "2026-03-27T14:29:53Z"
+    "title": "Fix `Wav2Vec2Config.vocab_size` type to allow `None`",
+    "updated_at": "2026-04-09T14:56:15Z"
   },
   {
-    "additions": 318,
-    "author": "NathanHB",
-    "author_association": "MEMBER",
-    "body_excerpt": "Change model_dump_json() to model_dump() to avoid double JSON encoding. When using continuous batching with stream=false, the response was being double-encoded as a string instead of returning a proper JSON object. Added a UV script to run\u2026",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description The `text-to-speech` pipeline crashes when loading models whose `generation_config` contains fields set to `None` (e.g. `use_cache=None`). This is because `TextToAudioPipeline.__init__` blindly copies all generation config v\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45057",
-    "created_at": "2026-03-27T13:02:59Z",
-    "deletions": 37,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45107",
+    "created_at": "2026-03-30T02:32:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45057/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45057",
+    "files_url": "https://github.com/huggingface/transformers/pull/45107/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45107",
     "labels": [],
     "merged": true,
-    "number": 45057,
+    "number": 45107,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[serving] Fix continuous batching JSON response serialization",
-    "updated_at": "2026-03-31T13:04:32Z"
+    "title": "Fix `text-to-speech` pipeline crash when generation config contains `None` values",
+    "updated_at": "2026-04-08T17:13:05Z"
   },
   {
-    "additions": 331,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This is mega long due I wanted to check benches. Its not super super huge but a win is a win",
-    "changed_files": 4,
+    "additions": 72,
+    "author": "rpathade",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes `_process_kwargs_parameters` crashing with `AttributeError` when `@auto_docstring` is applied in a module that uses `from __future__ import annotations`. Fixes #45103 ## Root cause `from __future__ import anno\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45056",
-    "created_at": "2026-03-27T11:36:10Z",
-    "deletions": 28,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45105",
+    "created_at": "2026-03-29T23:37:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45056/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45056",
+    "files_url": "https://github.com/huggingface/transformers/pull/45105/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45105",
     "labels": [],
     "merged": false,
-    "number": 45056,
-    "review_comments_count": 5,
+    "number": 45105,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[`auto_docstring`] needs to be only run on __doc__ ",
-    "updated_at": "2026-03-29T12:57:04Z"
+    "title": "Fix @auto_docstring crash with from __future__ import annotations in _process_kwargs_parameters",
+    "updated_at": "2026-03-30T00:20:41Z"
   },
   {
-    "additions": 3,
-    "author": "vasanthrpjan1-boop",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? When `Trainer` saves a checkpoint for a model that is not a `PreTrainedModel` (e.g. a custom `nn.Module`), it only saves the state dict but not the model config. This means `Model.from_pretrained(ckpt_path)` requir\u2026",
+    "additions": 13,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #45103 The `@auto_docstring` decorator crashes at import time when applied to a class in a module that uses `from __future__ import annotations`. This is because `from __future__ import annotations` makes all annotatio\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45055",
-    "created_at": "2026-03-27T11:31:10Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45104",
+    "created_at": "2026-03-29T23:26:49Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45055/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45055",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45104/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45104",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45055,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Save model config in Trainer checkpoints for non-PreTrainedModel models",
-    "updated_at": "2026-03-27T11:31:10Z"
-  },
-  {
-    "additions": 3,
-    "author": "hf-security-analysis[bot]",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Update `.github/workflows/update_metdata.yml` workflow configuration. cc @tarekziade @ydshieh Closes huggingface/tracking-issues#33",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45054",
-    "created_at": "2026-03-27T11:16:11Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45054/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45054",
-    "labels": [],
-    "merged": true,
-    "number": 45054,
+    "number": 45104,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: update update_metdata.yml",
-    "updated_at": "2026-03-27T15:57:14Z"
+    "title": "Fix auto_docstring crash with from __future__ import annotations",
+    "updated_at": "2026-03-30T12:13:20Z"
   },
   {
-    "additions": 1,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes failing [`XCLIPModelIntegrationTests`](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524760869#step:14:1384). [`self.get_attributes()`](https://github.com/huggingface/transformers/\u2026",
-    "changed_files": 1,
+    "additions": 1664,
+    "author": "HemanthSai7",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Co-authored-by: Vishesht27 This PR adds support for codes for the upcoming Nandi series models. We also appreciate the valuable feedback and thorough review provided by @vasqu and @ArthurZucker \ud83e\udd17\ud83d\ude4f",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45053",
-    "created_at": "2026-03-27T11:11:18Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45101",
+    "created_at": "2026-03-29T20:35:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45053/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45053",
+    "files_url": "https://github.com/huggingface/transformers/pull/45101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45101",
     "labels": [],
     "merged": false,
-    "number": 45053,
-    "review_comments_count": 2,
+    "number": 45101,
+    "review_comments_count": 4,
     "state": "open",
-    "title": "Fix failing `XCLIPModelIntegrationTest`",
-    "updated_at": "2026-04-07T15:31:17Z"
+    "title": "Adding support for Nandi Models",
+    "updated_at": "2026-04-14T09:38:28Z"
   },
   {
     "additions": 1,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `.gitignore` was not updated when `mlinter` was refactored",
+    "author": "code-runner77",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Improve wording in accelerator selection documentation # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45052",
-    "created_at": "2026-03-27T10:16:17Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45100",
+    "created_at": "2026-03-29T17:28:59Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45052/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45052",
+    "files_url": "https://github.com/huggingface/transformers/pull/45100/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45100",
     "labels": [],
-    "merged": true,
-    "number": 45052,
+    "merged": false,
+    "number": 45100,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: Fix mlinter cache location",
-    "updated_at": "2026-03-27T10:26:38Z"
+    "title": "Update accelerator_selection.md",
+    "updated_at": "2026-03-30T13:40:51Z"
   },
   {
     "additions": 2,
-    "author": "albertvillanova",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fix NaN weights on non-rank-0 FSDP processes by using `zeros_like` instead of `empty_like` in `_move_missing_keys_from_meta_to_device` Follow-up to: - #44473 See related downstream issue in `trl` : - https://github.com/huggingface/trl/issu\u2026",
+    "author": "zendy199x",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review an\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45050",
-    "created_at": "2026-03-27T09:19:32Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45098",
+    "created_at": "2026-03-29T15:50:04Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45050/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45050",
+    "files_url": "https://github.com/huggingface/transformers/pull/45098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45098",
     "labels": [],
-    "merged": true,
-    "number": 45050,
-    "review_comments_count": 8,
+    "merged": false,
+    "number": 45098,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix NaN weights on non-rank-0 FSDP processes",
-    "updated_at": "2026-04-13T09:38:32Z"
+    "title": "fix: incomplete string literal causes syntax error in config docstring checker",
+    "updated_at": "2026-03-30T13:27:17Z"
   },
   {
-    "additions": 6,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45030 After the config validation, all validations are now run after config is initialized. So this config has been wrong from the beginning but we didn't com\u2026",
-    "changed_files": 1,
+    "additions": 771,
+    "author": "baonudesifeizhai",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR extends the InternVL conversion script to support the old `OpenGVLab/InternVL2-1B` and `OpenGVLab/InternVL2-2B` checkpoints. These checkpoints currently rely on remote code and are problematic for downstream\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45049",
-    "created_at": "2026-03-27T08:46:46Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45049/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45049",
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45097",
+    "created_at": "2026-03-29T05:29:42Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45097",
+    "labels": [],
+    "merged": false,
+    "number": 45097,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add old InternVL2-1B/2B support to the InternVL conversion script #45092",
+    "updated_at": "2026-04-02T07:59:13Z"
+  },
+  {
+    "additions": 5,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Problem Old remote-code checkpoints (like InternVL2) perform real-tensor operations during model construction (e.g., calling `.item()` on tensors). This causes `RuntimeError: Tensor.item() cannot be called on meta tensors` when models a\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45096",
+    "created_at": "2026-03-29T05:13:12Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45096/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45096",
     "labels": [
-      "for patch"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 45049,
+    "merged": false,
+    "number": 45096,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix when RoPE params are in kwargs",
-    "updated_at": "2026-03-27T16:28:13Z"
+    "title": "Fix: Skip meta device initialization for remote code models",
+    "updated_at": "2026-03-30T12:13:08Z"
   },
   {
-    "additions": 4,
-    "author": "Sai-Suraj-27",
+    "additions": 100,
+    "author": "HanFa",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes failing [`SmolLM3IntegrationTest:test_model_3b_long_prompt`](https://github.com/huggingface/transformers/actions/runs/23629638266/job/68826332952#step:14:216). `SmolLM3` has [`do_sample=True` by default](https\u2026",
-    "changed_files": 1,
+    "body_excerpt": "When a config class has been explicitly registered via AutoConfig.register(), it should take precedence over auto_map remote code. Previously, `trust_remote_code=True` with auto_map.AutoConfig in config.json would always load remote code,\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45048",
-    "created_at": "2026-03-27T08:45:26Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45094",
+    "created_at": "2026-03-29T04:21:29Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45048/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45048",
+    "files_url": "https://github.com/huggingface/transformers/pull/45094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45094",
     "labels": [],
     "merged": true,
-    "number": 45048,
+    "number": 45094,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `SmolLM3IntegrationTest`",
-    "updated_at": "2026-03-27T14:18:05Z"
+    "title": "fix: prefer registered config over remote code in AutoConfig.from_pretrained",
+    "updated_at": "2026-03-31T14:56:49Z"
   },
   {
-    "additions": 503,
-    "author": "Akshay404error",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "Fixes #44568 \u2014 restores add_special_tokens behavior for mDeBERTa tokenizer and non-persistent buffers in v5 ## What does this PR do? This PR fixes two v5 regressions: 1. `add_special_tokens=True` no longer added BOS/EOS tokens for the `mic\u2026",
-    "changed_files": 5,
+    "additions": 6,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes the unexpected behaviour of helper function `_get_feat_extract_output_lengths` in qwen3_omni_moe as reported in #45083. ## Problem The current implementation incorrectly calculates the output length of the convolutional layer\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45047",
-    "created_at": "2026-03-27T08:33:01Z",
-    "deletions": 14,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45091",
+    "created_at": "2026-03-29T00:37:06Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45047/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45047",
+    "files_url": "https://github.com/huggingface/transformers/pull/45091/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45091",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45047,
+    "number": 45091,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: restore add_special_tokens behavior for mDeBERTa tokenizer and n\u2026",
-    "updated_at": "2026-03-27T13:15:58Z"
+    "title": "Fix _get_feat_extract_output_lengths in qwen3_omni_moe",
+    "updated_at": "2026-03-30T12:12:57Z"
   },
   {
-    "additions": 548,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Investigate speedups on modular conversion",
-    "changed_files": 2,
+    "additions": 17,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #45084 The `VoxtralProcessor.apply_chat_template` method was calling `_get_template_variables(chat_template)` without first checking if `chat_template` was None. This caused a `TypeError: Can't compile non template nod\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45046",
-    "created_at": "2026-03-27T08:31:58Z",
-    "deletions": 42,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45046/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45046",
-    "labels": [],
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45090",
+    "created_at": "2026-03-29T00:35:00Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45090/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45090",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45046,
+    "number": 45090,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "investigate modular conversion speedups",
-    "updated_at": "2026-03-27T16:33:45Z"
+    "state": "closed",
+    "title": "Fix TypeError when chat_template is None in VoxtralProcessor",
+    "updated_at": "2026-03-30T12:10:25Z"
   },
   {
-    "additions": 5095,
-    "author": "Lidang-Jiang",
+    "additions": 4,
+    "author": "Krishnachaitanyakc",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Isolate dependencies, make PIL independant from Torchvision backend Fixes #45042 PR #45029 added `@requires(backends=(\"vision\", \"torch\", \"torchvision\"))` to 67 PIL backend `image_processing_pil_*.py` files. This causes PIL backend class\u2026",
-    "changed_files": 188,
+    "body_excerpt": "## Summary Fixes #45003 `_can_set_attn_implementation` and `_can_set_experts_implementation` in `PreTrainedModel` use `sys.modules[cls.__module__]`, which raises `KeyError` when a module has been removed from `sys.modules` at runtime. This\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45045",
-    "created_at": "2026-03-27T08:19:19Z",
-    "deletions": 1270,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45089",
+    "created_at": "2026-03-28T16:44:06Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45045/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45045",
+    "files_url": "https://github.com/huggingface/transformers/pull/45089/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45089",
     "labels": [
-      "for patch"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 45045,
-    "review_comments_count": 24,
+    "merged": false,
+    "number": 45089,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Bugfix] Remove incorrect torchvision requirement from PIL backend image processors",
-    "updated_at": "2026-03-30T07:25:49Z"
+    "title": "fix: use sys.modules.get() to avoid KeyError in modeling_utils",
+    "updated_at": "2026-03-30T14:19:30Z"
   },
   {
-    "additions": 35,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fix issue in https://github.com/huggingface/transformers/issues/44792. @zucchini-nlp @ydshieh pls help review, thx!",
+    "additions": 6,
+    "author": "knQzx",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "corrects the conv output length calculation in _get_feat_extract_output_lengths which was computing wrong values for the audio encoder. fixes #45083",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45044",
-    "created_at": "2026-03-27T07:50:21Z",
-    "deletions": 26,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45088",
+    "created_at": "2026-03-28T16:40:07Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45044/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45044",
+    "files_url": "https://github.com/huggingface/transformers/pull/45088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45088",
     "labels": [],
-    "merged": true,
-    "number": 45044,
-    "review_comments_count": 11,
+    "merged": false,
+    "number": 45088,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix bug for janus model image generation",
-    "updated_at": "2026-04-02T02:46:14Z"
+    "title": "fix audio encoder output length formula in qwen3_omni_moe",
+    "updated_at": "2026-03-30T12:53:42Z"
   },
   {
-    "additions": 31,
-    "author": "Lidang-Jiang",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #45003. `_can_set_attn_implementation` and `_can_set_experts_implementation` in `modeling_utils.py` crash with `KeyError` when `cls.__module__` is absent from `sys.modules`. This happens in real-world scenari\u2026",
-    "changed_files": 2,
+    "additions": 5,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes issue #45071 where mypy type checking was broken for PretrainedConfig subclasses. ## Problem In transformers v5.4.0, the PretrainedConfig class was converted to a dataclass with a wrapper around __init__ to accept arbitrary k\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45043",
-    "created_at": "2026-03-27T06:07:20Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45087",
+    "created_at": "2026-03-28T16:38:11Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45043/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45043",
+    "files_url": "https://github.com/huggingface/transformers/pull/45087/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45087",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 45043,
+    "number": 45087,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Bugfix] Use sys.modules.get() to avoid KeyError in modeling_utils",
-    "updated_at": "2026-03-27T13:02:19Z"
+    "title": "Fix PretrainedConfig type checking with mypy",
+    "updated_at": "2026-03-30T12:12:48Z"
   },
   {
-    "additions": 677,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR addresses the performance issues observed with nn.Conv3d across different PyTorch/cuDNN, such as https://github.com/vllm-project/vllm/pull/27418, https://mp.weixin.qq.com/s/hKRIpB561EdrMY8cbg1hEw. We replace\u2026",
-    "changed_files": 7,
+    "additions": 3,
+    "author": "knQzx",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "the function accesses backend_tokenizer.pre_tokenizer but the tokenizer passed is already the raw rust object, so it should be pre_tokenizer directly. fixes #45081",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45041",
-    "created_at": "2026-03-27T03:50:54Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45086",
+    "created_at": "2026-03-28T16:37:49Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45041/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45041",
+    "files_url": "https://github.com/huggingface/transformers/pull/45086/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45086",
     "labels": [],
     "merged": false,
-    "number": 45041,
-    "review_comments_count": 59,
+    "number": 45086,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[inference_fusion] convert conv3d patch embed to linear",
-    "updated_at": "2026-04-10T10:34:17Z"
+    "title": "fix AttributeError in _patch_mistral_regex",
+    "updated_at": "2026-03-28T16:37:49Z"
   },
   {
-    "additions": 13,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- vision models: @yonigozlan @molbap - CIs: @ydshieh when running tests/models/video_llama_3/test_modeling_video_llama_3.py::VideoLlama3IntegrationTest all fail cause by lm_head.weight is missing.",
+    "additions": 7,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes #45072. ## Changes ### SwitchTransformers - Fixed a bug in `SwitchTransformersTop1Router.forward()` where `router_logits` was being reassigned to the max probability values instead of keeping the raw logits from the classifie\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45040",
-    "created_at": "2026-03-27T02:56:58Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45085",
+    "created_at": "2026-03-28T16:28:27Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45040/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45040",
+    "files_url": "https://github.com/huggingface/transformers/pull/45085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45085",
     "labels": [],
     "merged": false,
-    "number": 45040,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Llama3 video fix",
-    "updated_at": "2026-03-27T10:55:26Z"
+    "number": 45085,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix dtype mismatches in SwitchTransformers and TimmWrapperModel for bfloat16",
+    "updated_at": "2026-03-30T11:25:14Z"
   },
   {
-    "additions": 26,
-    "author": "Lidang-Jiang",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44716 - Propagate `interpolate_pos_encoding` parameter through `PixioModel.forward()`, `PixioBackbone.forward()`, and `PixioEmbeddings.forward()` down to `PixioPatchEmbeddings.forward()` - Follows the same pattern used by\u2026",
+    "additions": 143,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates the conversion script of VidEoMT to convert all remaining checkpoints. Find them here: https://huggingface.co/papers/2602.17807",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45039",
-    "created_at": "2026-03-27T02:54:31Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45082",
+    "created_at": "2026-03-28T14:07:12Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45039/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45039",
+    "files_url": "https://github.com/huggingface/transformers/pull/45082/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45082",
     "labels": [],
     "merged": false,
-    "number": 45039,
+    "number": 45082,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Bugfix] Propagate interpolate_pos_encoding through Pixio model",
-    "updated_at": "2026-03-27T12:57:30Z"
+    "state": "open",
+    "title": "[VidEoMT] Update conversion script",
+    "updated_at": "2026-03-28T14:16:54Z"
   },
   {
-    "additions": 1,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 13,
+    "author": "joaquinhuigomez",
+    "author_association": "NONE",
+    "body_excerpt": "## Root cause The v5.4.0 release converted `PreTrainedConfig` from a regular class to a `@dataclass`. This changes how Pydantic handles it when used as a field type in a `BaseModel`: instead of treating it as an opaque arbitrary type, Pyda\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45038",
-    "created_at": "2026-03-27T00:18:35Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45080",
+    "created_at": "2026-03-28T12:13:57Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45038/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45038",
+    "files_url": "https://github.com/huggingface/transformers/pull/45080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45080",
     "labels": [],
-    "merged": true,
-    "number": 45038,
+    "merged": false,
+    "number": 45080,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "style was missing sorry @ydshieh :)",
-    "updated_at": "2026-03-27T00:28:23Z"
+    "title": "Fix PreTrainedConfig as Pydantic field type after dataclass conversion",
+    "updated_at": "2026-03-29T19:36:24Z"
   },
   {
-    "additions": 1,
-    "author": "asuryateja",
+    "additions": 50,
+    "author": "javierdejesusda",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026_interface docs The `custom_attention` function definition in the attention_interface documentation was missing a colon at the end of the return type annotation, making it invalid Python syntax. # What does this PR do? <!-- Congratulation\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? Fixes #35141 When `tie_word_embeddings=False`, calling `resize_token_embeddings()` then `post_init()` overwrites the LM head weights with random values. This happens because `_get_resized_lm_head()` returns a new `\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45037",
-    "created_at": "2026-03-26T23:48:04Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45079",
+    "created_at": "2026-03-28T00:06:03Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45037/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45037",
+    "files_url": "https://github.com/huggingface/transformers/pull/45079/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45079",
     "labels": [],
-    "merged": false,
-    "number": 45037,
+    "merged": true,
+    "number": 45079,
     "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix resized LM head weights being overwritten by post_init",
+    "updated_at": "2026-04-02T14:13:31Z"
+  },
+  {
+    "additions": 33,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "fixes fallback https://github.com/huggingface/transformers/issues/44993",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45078",
+    "created_at": "2026-03-27T23:06:36Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45078/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45078",
+    "labels": [],
+    "merged": false,
+    "number": 45078,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "add missing colon in custom_attention function signature in attention\u2026",
-    "updated_at": "2026-03-27T00:17:55Z"
+    "title": "throw error when conversion required",
+    "updated_at": "2026-04-13T09:09:31Z"
   },
   {
-    "additions": 15,
-    "author": "matdou",
+    "additions": 312,
+    "author": "dagecko",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #45030 Configs like `tiny-random/glm-4v` store `rope_theta` at the top level of `config.json` alongside a `rope_scaling` dict (legacy format). For config classes that don't declare `rope_parameters` as a datac\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Re-submission of #45010. Had a problem with my fork and had to delete it, which closed the original PR. Apologies for the noise. @tarekziade @ydshieh I noticed you fixed the critical findings from the original PR, which is great. This resu\u2026",
+    "changed_files": 22,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45036",
-    "created_at": "2026-03-26T23:00:35Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45077",
+    "created_at": "2026-03-27T22:20:56Z",
+    "deletions": 312,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45036/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45036",
+    "files_url": "https://github.com/huggingface/transformers/pull/45077/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45077",
     "labels": [],
     "merged": false,
-    "number": 45036,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "[fix] BC for legacy configs with top-level rope_theta when rope_parameters is set via rope_scaling",
-    "updated_at": "2026-03-28T23:47:33Z"
+    "number": 45077,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: pin 50 unpinned actions to commit SHA, extract 1 secret to env var",
+    "updated_at": "2026-03-27T22:20:56Z"
   },
   {
-    "additions": 1,
-    "author": "asuryateja",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "The conversion operations table was missing PermuteForRope. Added it with its reverse (itself), consistent with how other operations are documented. PermuteForRope is self-inverse applying it twice returns the original tensor layout. # Wha\u2026",
-    "changed_files": 1,
+    "additions": 376,
+    "author": "osman-akkawi",
+    "author_association": "NONE",
+    "body_excerpt": "As Osman Akkawi, I am proud to submit this comprehensive Pull Request which introduces two world-first, unique innovations to the transformers library alongside essential codebase maintenance. This PR transforms how users interact with and\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45035",
-    "created_at": "2026-03-26T21:05:17Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45076",
+    "created_at": "2026-03-27T20:30:37Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45035/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45035",
-    "labels": [],
-    "merged": true,
-    "number": 45035,
+    "files_url": "https://github.com/huggingface/transformers/pull/45076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45076",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45076,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: add PermuteForRope to conversion operations reverse table",
-    "updated_at": "2026-03-26T22:09:53Z"
+    "title": "Osman-Level Innovations: Hardware-Aware Advisor & Selective Weight Surgery CLI",
+    "updated_at": "2026-04-06T10:49:18Z"
   },
   {
-    "additions": 113,
-    "author": "sdharani91",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This is a follow up to https://github.com/huggingface/transformers/pull/44867 This PR fixes Qwen3.5 padding-free packed inputs on the linear-attention fast path by consuming collator-provided packed metadata. The li\u2026",
-    "changed_files": 6,
+    "additions": 5184,
+    "author": "thisisiron",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds the **DeepSeek-OCR2** model. ### Reference - Arxiv Paper: [DeepSeek-OCR 2: Visual Causal Flow](https://arxiv.org/abs/2601.20552) - Huggingface hub: [deepseek-ai/DeepSeek-OCR-2](https://huggingface.co/deepseek-a\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45034",
-    "created_at": "2026-03-26T20:52:51Z",
-    "deletions": 9,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45075",
+    "created_at": "2026-03-27T20:14:27Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45034/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45034",
+    "files_url": "https://github.com/huggingface/transformers/pull/45075/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45075",
     "labels": [],
     "merged": false,
-    "number": 45034,
-    "review_comments_count": 38,
+    "number": 45075,
+    "review_comments_count": 53,
     "state": "open",
-    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels from data collator",
-    "updated_at": "2026-04-09T16:03:09Z"
+    "title": "Add Deepseek-OCR-2 model",
+    "updated_at": "2026-04-14T10:57:09Z"
   },
   {
-    "additions": 3,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "In https://github.com/huggingface/transformers/pull/43514, `BaseImageProcessorFast` became `BaseImageProcessor` and `_further_process_kwargs` was renamed to `_standardize_kwargs` This PR adds some BC for the old name of this method.",
-    "changed_files": 1,
+    "additions": 12,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following dtype mismatch use cases were identified and fixed in this PR: \u2192 **Switch Transformers:** [7938e91fa](https://github.com/harshaljanjani/transformers/commit/7938e91faabb051f3a001cd39c173d4697c2d81c) r\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45033",
-    "created_at": "2026-03-26T20:36:40Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45074",
+    "created_at": "2026-03-27T20:02:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45033/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45033",
+    "files_url": "https://github.com/huggingface/transformers/pull/45074/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45074",
     "labels": [],
     "merged": true,
-    "number": 45033,
-    "review_comments_count": 0,
+    "number": 45074,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Add BC for `_further_process_kwargs`",
-    "updated_at": "2026-03-26T21:01:32Z"
+    "title": "fix(models): Fix dtype mismatch in SwitchTransformers and TimmWrapperModel",
+    "updated_at": "2026-04-02T13:59:46Z"
   },
   {
-    "additions": 163,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Use multi runners to get new failing tests in a CI run.",
+    "additions": 1239,
+    "author": "Aravind-11",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "What does this pr do? - Add modular_owlvit.py inheriting CLIP vision/text embeddings, MLP, encoder layer, encoder - Import box IoU helpers from loss_for_object_detection; eager_attention from BERT - Regenerate modeling_owlvit.py via modula\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45032",
-    "created_at": "2026-03-26T18:48:18Z",
-    "deletions": 67,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45032/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45032",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45073",
+    "created_at": "2026-03-27T20:00:41Z",
+    "deletions": 135,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45073/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45073",
     "labels": [],
-    "merged": true,
-    "number": 45032,
+    "merged": false,
+    "number": 45073,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Use multi runners to check new failing tests in a CI run",
-    "updated_at": "2026-03-26T18:59:08Z"
+    "state": "open",
+    "title": "Refactor OwlViT to modular Transformers",
+    "updated_at": "2026-04-09T15:42:31Z"
   },
   {
-    "additions": 6,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up for https://github.com/huggingface/transformers/pull/44931, which added weight tying for Camembert. Only the CamembertForMaskedLM class had the right _tied_weights_keys, the CamembertForCausalLM had the in\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "Fr0do",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `TypeError` in `_check_received_keys` (line 919 of `modeling_rope_utils.py`) where `received_keys -= ignore_keys` fails when `ignore_keys` is a `list` instead of a `set`. ## Root cause Model configs (Qwen3.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45031",
-    "created_at": "2026-03-26T18:28:54Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45069",
+    "created_at": "2026-03-27T19:21:01Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45031/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45031",
+    "files_url": "https://github.com/huggingface/transformers/pull/45069/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45069",
     "labels": [],
     "merged": true,
-    "number": 45031,
+    "number": 45069,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Use the correct _tied_weights_keys for CamembertForCausalLM",
-    "updated_at": "2026-03-26T18:57:29Z"
+    "title": "Fix TypeError in rope validation when ignore_keys is a list",
+    "updated_at": "2026-03-30T11:41:12Z"
   },
   {
-    "additions": 478,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Release workflow is failing",
-    "changed_files": 101,
+    "additions": 22,
+    "author": "aarushisingh04",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### what does this PR do? this pr enables `trainer.train(resume_from_checkpoint=...)` to accept hugging face hub repository ids. instead of only local paths, users can now pass `user/repo@revision` and the trainer will automatically downlo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45029",
-    "created_at": "2026-03-26T18:04:37Z",
-    "deletions": 768,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45067",
+    "created_at": "2026-03-27T18:26:03Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45029/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45029",
-    "labels": [],
-    "merged": true,
-    "number": 45029,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "Fix release full",
-    "updated_at": "2026-03-27T06:34:00Z"
-  },
-  {
-    "additions": 2015,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "- TODO - how will dtensor works with quantization ? - how will dtensor works with kernels ? - Needs end to end test (combine `verify_all_loss` -> `training` with saving + loading back for generate ?) - double check Save FSDP + TP - do test\u2026",
-    "changed_files": 25,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45028",
-    "created_at": "2026-03-26T17:50:07Z",
-    "deletions": 1881,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45028/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45028",
+    "files_url": "https://github.com/huggingface/transformers/pull/45067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45067",
     "labels": [],
     "merged": false,
-    "number": 45028,
-    "review_comments_count": 13,
+    "number": 45067,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "TP refactor for FSDP + TP integration",
-    "updated_at": "2026-04-13T09:42:39Z"
+    "title": "feat: trainer resume_from_checkpoint support hub downloads (#43375)",
+    "updated_at": "2026-03-27T18:52:10Z"
   },
   {
-    "additions": 714,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds per-request logits processors and overalls the way CB handles logits processors. It introduces batched logits processing with per-request parameters for continuous batching, enabling each request in a batch to use di\u2026",
-    "changed_files": 11,
+    "additions": 305,
+    "author": "osman-akkawi",
+    "author_association": "NONE",
+    "body_excerpt": "As **Osman Akkawi**, I am proud to submit this Pull Request which introduces a world-first, unique feature to the `transformers` library alongside essential codebase maintenance. This PR focuses on one goal: making state-of-the-art models\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45026",
-    "created_at": "2026-03-26T17:00:07Z",
-    "deletions": 202,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45066",
+    "created_at": "2026-03-27T17:04:14Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45026/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45026",
+    "files_url": "https://github.com/huggingface/transformers/pull/45066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45066",
     "labels": [],
-    "merged": true,
-    "number": 45026,
-    "review_comments_count": 32,
+    "merged": false,
+    "number": 45066,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Add per-request logits processors",
-    "updated_at": "2026-04-03T16:44:11Z"
+    "title": "[PR] Unique Enhancement: Transformers Model Advisor & Legacy Cleanup",
+    "updated_at": "2026-03-27T20:31:59Z"
   },
   {
-    "additions": 2,
-    "author": "layla1824",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "This PR adds a clarification comment regarding the behavior of rotary_pct. Currently, rotary_pct may reset to its default value (0.25) after reload due to the use of kwargs.pop. This note helps developers better understand this behavior.",
+    "additions": 0,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Removes TensorFlow's `\"TF_CPP_MIN_LOG_LEVEL\"` env var. This is no longer needed since TF/Jax are gone. This `utils/print_env.py` script is being used in CI running tests to print, req useful env vars. ## Code Agent\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45025",
-    "created_at": "2026-03-26T16:48:53Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45065",
+    "created_at": "2026-03-27T16:43:30Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45025/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45025",
+    "files_url": "https://github.com/huggingface/transformers/pull/45065/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45065",
     "labels": [],
-    "merged": false,
-    "number": 45025,
+    "merged": true,
+    "number": 45065,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update configuration_Clarify rotary_pct reset behavior in GPTNeoXConfiggpt_neox.py",
-    "updated_at": "2026-03-27T09:05:48Z"
+    "title": "Remove unused TensorFlow env var",
+    "updated_at": "2026-03-27T17:24:30Z"
   },
   {
-    "additions": 1,
+    "additions": 101,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `tf` and `flax` are long gone (unknown extras generate only warnings so this slipped through the cracks)",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? check modular import can be extremely slow (8mn in CI) we're investigating speeding it up in https://github.com/huggingface/transformers/pull/45046 But we can also shard jobs in CI to mitigate a little bit. This pat\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45024",
-    "created_at": "2026-03-26T16:21:30Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45024/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45024",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45064",
+    "created_at": "2026-03-27T16:26:27Z",
+    "deletions": 34,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45064/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45064",
     "labels": [],
-    "merged": true,
-    "number": 45024,
+    "merged": false,
+    "number": 45064,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "chore: remove old extras",
-    "updated_at": "2026-03-27T11:16:03Z"
+    "state": "open",
+    "title": "refactor: shard checkers",
+    "updated_at": "2026-03-27T17:05:37Z"
   },
   {
-    "additions": 2529,
-    "author": "eustlb",
+    "additions": 176,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? **Integration notes:** For now, this integration does not load mel filters from the checkpoint. The original model was trained backpropagating gradients in it, but we saw previously (with parakeet-ctc) that this doe\u2026",
-    "changed_files": 19,
+    "body_excerpt": "# What does this PR do? This PR adds some features that makes serving more efficient. It shouldn't impact `generate_batch` at all: - Per-request result delivery via callbacks (replaces shared queue contention). Added `_request_callbacks` d\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45023",
-    "created_at": "2026-03-26T15:58:08Z",
-    "deletions": 8,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45063",
+    "created_at": "2026-03-27T16:07:43Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45023/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45023",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45063/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45063",
+    "labels": [],
     "merged": true,
-    "number": 45023,
-    "review_comments_count": 13,
+    "number": 45063,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "Add cohere asr",
-    "updated_at": "2026-03-26T22:48:16Z"
+    "title": "CB improvements for serving ",
+    "updated_at": "2026-03-30T18:48:33Z"
   },
   {
-    "additions": 1,
-    "author": "popotest",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 55,
+    "author": "ErenAta16",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR adds a regression test for Unicode corruption when decoding `added_tokens` with ByteLevel tokenizers (e.g. GPT-2 family). In affected cases, characters such as `\u010d`, `\u0107`, `\u0111` can decode into control characters (`\\r`, `\\x07`, `\\x11`)\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45022",
-    "created_at": "2026-03-26T15:33:19Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45062",
+    "created_at": "2026-03-27T15:23:38Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45022/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45022",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45062/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45062",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45022,
-    "review_comments_count": 0,
+    "number": 45062,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Update _config.py",
-    "updated_at": "2026-03-27T13:17:36Z"
+    "title": "Add regression test for ByteLevel added-token Unicode decode corruption",
+    "updated_at": "2026-03-27T23:36:35Z"
   },
   {
-    "additions": 2,
-    "author": "hf-security-analysis[bot]",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Update `.github/workflows/anti-slop.yml` workflow configuration. cc @tarekziade Closes huggingface/tracking-issues#30",
-    "changed_files": 1,
+    "additions": 12,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, see https://github.com/huggingface/transformers/pull/42435#issuecomment-4143234736",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45021",
-    "created_at": "2026-03-26T13:40:18Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45061",
+    "created_at": "2026-03-27T15:09:50Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45021/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45021",
+    "files_url": "https://github.com/huggingface/transformers/pull/45061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45061",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45061,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`FA`] Fix BC support for a few versions + add deprecation cycle",
+    "updated_at": "2026-03-27T15:37:13Z"
+  },
+  {
+    "additions": 54,
+    "author": "ErenAta16",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes a regression where PIL-based image/video processors were incorrectly treated as requiring `torchvision`. As a result, `AutoProcessor` / `AutoImageProcessor` could fail in environments without `torchvision`, even though a vali\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45060",
+    "created_at": "2026-03-27T13:43:38Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45060/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45060",
     "labels": [],
     "merged": false,
-    "number": 45021,
+    "number": 45060,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "chore: update anti-slop.yml",
-    "updated_at": "2026-03-26T13:53:57Z"
+    "state": "open",
+    "title": "Fix PIL backend fallback when torchvision is unavailable",
+    "updated_at": "2026-03-30T13:50:34Z"
   },
   {
-    "additions": 21,
-    "author": "javierdejesusda",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44877 Loading `ibm-granite/granite-4.0-1b-speech` fails with `StrictDataclassFieldValidationError` because its config.json stores `embedding_multiplier` and `logits_scaling` as integers (e.g. `12`, `8`), but\u2026",
-    "changed_files": 4,
+    "additions": 9,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some checkpoints, such as https://huggingface.co/omni-research/Tarsier2-Recap-7b, have the wrong `model_type` in their `config.json`. This PR allows advanced users (vLLM) to pass `model_type` into `AutoConfig.from_pretrained` via `kwargs`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45019",
-    "created_at": "2026-03-26T11:19:19Z",
-    "deletions": 12,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45058",
+    "created_at": "2026-03-27T13:24:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45019/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45019",
+    "files_url": "https://github.com/huggingface/transformers/pull/45058/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45058",
     "labels": [],
     "merged": true,
-    "number": 45019,
-    "review_comments_count": 2,
+    "number": 45058,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix GraniteConfig type hints to accept int for multiplier fields",
-    "updated_at": "2026-03-27T09:30:17Z"
+    "title": "Allow advanced users to override `model_type` in `AutoConfig.from_pretrained`",
+    "updated_at": "2026-03-27T14:29:53Z"
   },
   {
-    "additions": 1526,
-    "author": "zucchini-nlp",
+    "additions": 318,
+    "author": "NathanHB",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I thought a lot about how to make this dynamic for devs (who add models etc.) and keep static for users. The whole thing of automatically inferring config and model-type based on code runs with AST, so we don't have\u2026",
-    "changed_files": 42,
+    "body_excerpt": "Change model_dump_json() to model_dump() to avoid double JSON encoding. When using continuous batching with stream=false, the response was being double-encoded as a string instead of returning a proper JSON object. Added a UV script to run\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45018",
-    "created_at": "2026-03-26T11:18:13Z",
-    "deletions": 1399,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45057",
+    "created_at": "2026-03-27T13:02:59Z",
+    "deletions": 37,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45018/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45018",
+    "files_url": "https://github.com/huggingface/transformers/pull/45057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45057",
     "labels": [],
-    "merged": false,
-    "number": 45018,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Dynamic auto mapping (PoC)",
-    "updated_at": "2026-04-13T09:20:21Z"
+    "merged": true,
+    "number": 45057,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[serving] Fix continuous batching JSON response serialization",
+    "updated_at": "2026-03-31T13:04:32Z"
   },
   {
-    "additions": 574,
-    "author": "JaredforReal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? ### Get the rope operation right Before: NeoX split-half style After: GPT-J/interleaved style(`interleaved=True` same as `is_neox_style=Flase`) the right one ### Get rid of `F.relu` Reason: - `F.relu` works with `ac\u2026",
-    "changed_files": 5,
+    "additions": 331,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This is mega long due I wanted to check benches. Its not super super huge but a win is a win",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45017",
-    "created_at": "2026-03-26T09:21:10Z",
-    "deletions": 49,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45056",
+    "created_at": "2026-03-27T11:36:10Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45017/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45017",
+    "files_url": "https://github.com/huggingface/transformers/pull/45056/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45056",
     "labels": [],
     "merged": false,
-    "number": 45017,
+    "number": 45056,
     "review_comments_count": 5,
     "state": "open",
-    "title": "[WIP][Fix] GLM 5 set `apply_rotary_pos_emb` to `is_neox_style=False` && remove `F.relu()`",
-    "updated_at": "2026-04-13T09:29:16Z"
+    "title": "[`auto_docstring`] needs to be only run on __doc__ ",
+    "updated_at": "2026-03-29T12:57:04Z"
   },
   {
-    "additions": 64,
-    "author": "inisis",
+    "additions": 3,
+    "author": "vasanthrpjan1-boop",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## What does this PR do? When `Trainer` saves a checkpoint for a model that is not a `PreTrainedModel` (e.g. a custom `nn.Module`), it only saves the state dict but not the model config. This means `Model.from_pretrained(ckpt_path)` requir\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45016",
-    "created_at": "2026-03-26T09:09:41Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45055",
+    "created_at": "2026-03-27T11:31:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45016/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45016",
+    "files_url": "https://github.com/huggingface/transformers/pull/45055/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45055",
     "labels": [],
     "merged": false,
-    "number": 45016,
+    "number": 45055,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: glm5 inference bug",
-    "updated_at": "2026-03-26T12:10:21Z"
+    "state": "open",
+    "title": "Save model config in Trainer checkpoints for non-PreTrainedModel models",
+    "updated_at": "2026-03-27T11:31:10Z"
   },
   {
-    "additions": 55,
-    "author": "pnehete23",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes a `KeyError` in `_can_set_attn_implementation` and `_can_set_experts_implementation` when a model's module is absent from `sys.modules`. Fixes #45003 ## Root Cause Both `_can_set_attn_implementation` (line 19\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "hf-security-analysis[bot]",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update `.github/workflows/update_metdata.yml` workflow configuration. cc @tarekziade @ydshieh Closes huggingface/tracking-issues#33",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45015",
-    "created_at": "2026-03-26T08:52:10Z",
-    "deletions": 6,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45054",
+    "created_at": "2026-03-27T11:16:11Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45015/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45015",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 45015,
+    "files_url": "https://github.com/huggingface/transformers/pull/45054/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45054",
+    "labels": [],
+    "merged": true,
+    "number": 45054,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: guard sys.modules access in _can_set_attn/experts_implementation",
-    "updated_at": "2026-03-26T12:15:50Z"
+    "title": "chore: update update_metdata.yml",
+    "updated_at": "2026-03-27T15:57:14Z"
   },
   {
-    "additions": 16,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #30674 refactors the way we obtain CircleCI test files to run for each job. It always puts [\"tests\"] for `tests_hub`, so each commit of each PR will run it, no matter if there is any change to codebase. Let's reduce\u2026",
+    "additions": 1,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes failing [`XCLIPModelIntegrationTests`](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524760869#step:14:1384). [`self.get_attributes()`](https://github.com/huggingface/transformers/\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45014",
-    "created_at": "2026-03-26T08:52:04Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45053",
+    "created_at": "2026-03-27T11:11:18Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45014/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45014",
+    "files_url": "https://github.com/huggingface/transformers/pull/45053/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45053",
     "labels": [],
-    "merged": true,
-    "number": 45014,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 45053,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Don't run `tests_hub` if no tests found",
-    "updated_at": "2026-03-26T09:32:39Z"
+    "title": "Fix failing `XCLIPModelIntegrationTest`",
+    "updated_at": "2026-04-13T11:05:45Z"
   },
   {
-    "additions": 325,
+    "additions": 1,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Adds a new checker in `make chek-repo` that will `import transformers` and count the number of imported module. - Lazy import of torch when doing `import transformers` The change will reduce the import time from ~\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? `.gitignore` was not updated when `mlinter` was refactored",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45013",
-    "created_at": "2026-03-26T07:47:23Z",
-    "deletions": 49,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45052",
+    "created_at": "2026-03-27T10:16:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45013/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45013",
+    "files_url": "https://github.com/huggingface/transformers/pull/45052/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45052",
     "labels": [],
     "merged": true,
-    "number": 45013,
-    "review_comments_count": 3,
+    "number": 45052,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feature: added import complexity checker",
-    "updated_at": "2026-03-31T07:01:08Z"
+    "title": "chore: Fix mlinter cache location",
+    "updated_at": "2026-03-27T10:26:38Z"
   },
   {
-    "additions": 155,
-    "author": "tarekziade",
+    "additions": 2,
+    "author": "albertvillanova",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Cache to speed up ast walks, and ast tweak",
-    "changed_files": 2,
+    "body_excerpt": "Fix NaN weights on non-rank-0 FSDP processes by using `zeros_like` instead of `empty_like` in `_move_missing_keys_from_meta_to_device` Follow-up to: - #44473 See related downstream issue in `trl` : - https://github.com/huggingface/trl/issu\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45012",
-    "created_at": "2026-03-26T07:10:45Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45050",
+    "created_at": "2026-03-27T09:19:32Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45012/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45012",
+    "files_url": "https://github.com/huggingface/transformers/pull/45050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45050",
     "labels": [],
     "merged": true,
-    "number": 45012,
+    "number": 45050,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "Fix NaN weights on non-rank-0 FSDP processes",
+    "updated_at": "2026-04-13T13:35:35Z"
+  },
+  {
+    "additions": 6,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45030 After the config validation, all validations are now run after config is initialized. So this config has been wrong from the beginning but we didn't com\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45049",
+    "created_at": "2026-03-27T08:46:46Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45049/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45049",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45049,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor: added cache in check_repo",
-    "updated_at": "2026-03-30T06:44:23Z"
+    "title": "Fix when RoPE params are in kwargs",
+    "updated_at": "2026-03-27T16:28:13Z"
   },
   {
-    "additions": 64,
-    "author": "kaixuanliu",
+    "additions": 4,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh Hi, can you help review? Thx! Current error (before this PR): ```bash _ PI0ForConditionalGenerationModelTest.test_flash_attn_2_inference_equivalence _ self = <tests.models.pi0.test_modeling_pi0.PI0ForConditionalGenerationModelTest\u2026",
+    "body_excerpt": "# What does this PR do? Fixes failing [`SmolLM3IntegrationTest:test_model_3b_long_prompt`](https://github.com/huggingface/transformers/actions/runs/23629638266/job/68826332952#step:14:216). `SmolLM3` has [`do_sample=True` by default](https\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45011",
-    "created_at": "2026-03-26T06:27:09Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45048",
+    "created_at": "2026-03-27T08:45:26Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45011/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45011",
+    "files_url": "https://github.com/huggingface/transformers/pull/45048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45048",
     "labels": [],
     "merged": true,
-    "number": 45011,
-    "review_comments_count": 5,
+    "number": 45048,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "skip 2 invalid test cases for pi0 model",
-    "updated_at": "2026-04-10T10:41:16Z"
+    "title": "Fix failing `SmolLM3IntegrationTest`",
+    "updated_at": "2026-03-27T14:18:05Z"
   },
   {
-    "additions": 75,
-    "author": "dagecko",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Fix: CI/CD Security Vulnerabilities in GitHub Actions Hi! [Runner Guard](https://github.com/Vigilant-LLC/runner-guard), an open-source CI/CD security scanner by [Vigilant Cyber Security](https://www.vigilantdefense.com), identified secu\u2026",
-    "changed_files": 20,
+    "additions": 503,
+    "author": "Akshay404error",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Fixes #44568 \u2014 restores add_special_tokens behavior for mDeBERTa tokenizer and non-persistent buffers in v5 ## What does this PR do? This PR fixes two v5 regressions: 1. `add_special_tokens=True` no longer added BOS/EOS tokens for the `mic\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45010",
-    "created_at": "2026-03-26T06:19:29Z",
-    "deletions": 71,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45047",
+    "created_at": "2026-03-27T08:33:01Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45010/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45010",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45047",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 45010,
+    "number": 45047,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: pin 69 unpinned action(s),extract 2 unsafe expression(s) to env vars",
-    "updated_at": "2026-03-27T22:20:57Z"
+    "title": "fix: restore add_special_tokens behavior for mDeBERTa tokenizer and n\u2026",
+    "updated_at": "2026-03-27T13:15:58Z"
   },
   {
-    "additions": 330,
+    "additions": 548,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch improves the docstring checker implementation (redundant AST walks) and adds cache. For the AST calls, 2.3x speedup check_docstrings.py --check_all on my M1: - before : 29.3s - after: 12.6s",
+    "body_excerpt": "# What does this PR do? Investigate speedups on modular conversion",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45009",
-    "created_at": "2026-03-26T05:31:41Z",
-    "deletions": 47,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45009/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45009",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45046",
+    "created_at": "2026-03-27T08:31:58Z",
+    "deletions": 42,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45046/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45046",
     "labels": [],
-    "merged": true,
-    "number": 45009,
-    "review_comments_count": 12,
-    "state": "closed",
-    "title": "refactor: speed up docstring checker",
-    "updated_at": "2026-03-27T07:21:11Z"
+    "merged": false,
+    "number": 45046,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "investigate modular conversion speedups",
+    "updated_at": "2026-03-27T16:33:45Z"
   },
   {
-    "additions": 380,
-    "author": "Krishnachaitanyakc",
+    "additions": 5095,
+    "author": "Lidang-Jiang",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fix type annotation bugs across config classes that cause `@strict` validation errors from `huggingface_hub`. ### Bool fields mistyped as `int` (22 fixes) Fields with boolean defaults (`True`/`False`) were annotated as `int` ins\u2026",
-    "changed_files": 198,
+    "body_excerpt": "## Isolate dependencies, make PIL independant from Torchvision backend Fixes #45042 PR #45029 added `@requires(backends=(\"vision\", \"torch\", \"torchvision\"))` to 67 PIL backend `image_processing_pil_*.py` files. This causes PIL backend class\u2026",
+    "changed_files": 188,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45007",
-    "created_at": "2026-03-25T23:12:53Z",
-    "deletions": 380,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45045",
+    "created_at": "2026-03-27T08:19:19Z",
+    "deletions": 1270,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45007/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45007",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45045/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45045",
+    "labels": [
+      "for patch"
+    ],
     "merged": true,
-    "number": 45007,
-    "review_comments_count": 3,
+    "number": 45045,
+    "review_comments_count": 24,
     "state": "closed",
-    "title": "fix: correct type annotations across config classes for @strict validation",
-    "updated_at": "2026-04-01T17:50:09Z"
+    "title": "[Bugfix] Remove incorrect torchvision requirement from PIL backend image processors",
+    "updated_at": "2026-03-30T07:25:49Z"
   },
   {
-    "additions": 3,
-    "author": "Krishnachaitanyakc",
+    "additions": 35,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44869 Adds a bounds check in `_split_tokens_on_unicode()` in `tokenization_whisper.py` to handle trailing Unicode replacement characters (U+FFFD) at the end of decoded token streams without crashing with `IndexError`. ##\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Fix issue in https://github.com/huggingface/transformers/issues/44792. @zucchini-nlp @ydshieh pls help review, thx!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45006",
-    "created_at": "2026-03-25T23:03:00Z",
-    "deletions": 1,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45044",
+    "created_at": "2026-03-27T07:50:21Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45006/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45006",
+    "files_url": "https://github.com/huggingface/transformers/pull/45044/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45044",
     "labels": [],
-    "merged": false,
-    "number": 45006,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: prevent IndexError in Whisper timestamp decode on trailing replacement char",
-    "updated_at": "2026-04-06T01:03:09Z"
+    "merged": true,
+    "number": 45044,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "fix bug for janus model image generation",
+    "updated_at": "2026-04-02T02:46:14Z"
   },
   {
-    "additions": 10,
-    "author": "harshaljanjani",
+    "additions": 31,
+    "author": "Lidang-Jiang",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR (grouped them together since they share related root causes OR the code changes were extremely minimal and didn't warrant separate PRs): \u2192 **Phi-3**\u2026",
+    "body_excerpt": "## What does this PR do? Fixes #45003. `_can_set_attn_implementation` and `_can_set_experts_implementation` in `modeling_utils.py` crash with `KeyError` when `cls.__module__` is absent from `sys.modules`. This happens in real-world scenari\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45004",
-    "created_at": "2026-03-25T19:58:57Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45043",
+    "created_at": "2026-03-27T06:07:20Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45004/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45004",
-    "labels": [],
-    "merged": true,
-    "number": 45004,
-    "review_comments_count": 3,
+    "files_url": "https://github.com/huggingface/transformers/pull/45043/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45043",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45043,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Fix Parakeet, Evolla, Pi0, and Phi-3 test failures on main CI",
-    "updated_at": "2026-03-27T15:06:52Z"
+    "title": "[Bugfix] Use sys.modules.get() to avoid KeyError in modeling_utils",
+    "updated_at": "2026-03-27T13:02:19Z"
   },
   {
-    "additions": 1,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "`None` is a valid value that can be used to disable chunked attention in `DynamicCache` and Flex Attention. hf.co/morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct is an example of a checkpoint which does this.",
-    "changed_files": 1,
+    "additions": 677,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR addresses the performance issues observed with nn.Conv3d across different PyTorch/cuDNN, such as https://github.com/vllm-project/vllm/pull/27418, https://mp.weixin.qq.com/s/hKRIpB561EdrMY8cbg1hEw. We replace\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45002",
-    "created_at": "2026-03-25T17:40:14Z",
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45041",
+    "created_at": "2026-03-27T03:50:54Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45002/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45002",
+    "files_url": "https://github.com/huggingface/transformers/pull/45041/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45041",
     "labels": [],
     "merged": true,
-    "number": 45002,
-    "review_comments_count": 0,
+    "number": 45041,
+    "review_comments_count": 61,
     "state": "closed",
-    "title": "Fix type hint for `attention_chunk_size` in `Llama4TextConfig`",
-    "updated_at": "2026-03-25T20:42:11Z"
+    "title": "[inference_fusion] convert conv3d patch embed to linear",
+    "updated_at": "2026-04-13T15:30:39Z"
   },
   {
-    "additions": 20,
-    "author": "Sai-Suraj-27",
+    "additions": 13,
+    "author": "sywangyi",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? For [torch>=2.10.0](https://docs.pytorch.org/docs/2.10/generated/torch.nn.functional.grouped_mm.html#torch-nn-functional-grouped-mm), the minimum CUDA compute capability requirement for `torch.nn.functional.grouped_\u2026",
-    "changed_files": 1,
+    "body_excerpt": "- vision models: @yonigozlan @molbap - CIs: @ydshieh when running tests/models/video_llama_3/test_modeling_video_llama_3.py::VideoLlama3IntegrationTest all fail cause by lm_head.weight is missing.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45001",
-    "created_at": "2026-03-25T17:00:28Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45040",
+    "created_at": "2026-03-27T02:56:58Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45001/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45001",
+    "files_url": "https://github.com/huggingface/transformers/pull/45040/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45040",
     "labels": [],
-    "merged": true,
-    "number": 45001,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Add cuda compatibility check for using `grouped_mm`",
-    "updated_at": "2026-04-10T07:54:45Z"
+    "merged": false,
+    "number": 45040,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Llama3 video fix",
+    "updated_at": "2026-03-27T10:55:26Z"
   },
   {
-    "additions": 22,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, after https://github.com/huggingface/transformers/pull/44976 users will be seeing a `missing_weights - lm_head not found` error even though the model doesn't use an lm head On the way also deleted unne\u2026",
-    "changed_files": 8,
+    "additions": 26,
+    "author": "Lidang-Jiang",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44716 - Propagate `interpolate_pos_encoding` parameter through `PixioModel.forward()`, `PixioBackbone.forward()`, and `PixioEmbeddings.forward()` down to `PixioPatchEmbeddings.forward()` - Follows the same pattern used by\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45000",
-    "created_at": "2026-03-25T16:28:55Z",
-    "deletions": 109,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45039",
+    "created_at": "2026-03-27T02:54:31Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45000/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45000",
+    "files_url": "https://github.com/huggingface/transformers/pull/45039/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45039",
     "labels": [],
-    "merged": true,
-    "number": 45000,
+    "merged": false,
+    "number": 45039,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Embedding VLMs don't need a head",
-    "updated_at": "2026-03-27T09:57:18Z"
+    "title": "[Bugfix] Propagate interpolate_pos_encoding through Pixio model",
+    "updated_at": "2026-03-27T12:57:30Z"
   },
   {
-    "additions": 2450,
-    "author": "itazap",
+    "additions": 1,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44999",
-    "created_at": "2026-03-25T16:21:37Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45038",
+    "created_at": "2026-03-27T00:18:35Z",
     "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44999/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44999",
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45038/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45038",
     "labels": [],
-    "merged": false,
-    "number": 44999,
+    "merged": true,
+    "number": 45038,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-30T23:21:49Z"
+    "title": "style was missing sorry @ydshieh :)",
+    "updated_at": "2026-03-27T00:28:23Z"
   },
   {
-    "additions": 1179,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
-    "changed_files": 4,
+    "additions": 1,
+    "author": "asuryateja",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026_interface docs The `custom_attention` function definition in the attention_interface documentation was missing a colon at the end of the return type annotation, making it invalid Python syntax. # What does this PR do? <!-- Congratulation\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44997",
-    "created_at": "2026-03-25T14:23:13Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44997/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44997",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45037",
+    "created_at": "2026-03-26T23:48:04Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45037/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45037",
     "labels": [],
     "merged": false,
-    "number": 44997,
+    "number": 45037,
     "review_comments_count": 0,
+    "state": "open",
+    "title": "add missing colon in custom_attention function signature in attention\u2026",
+    "updated_at": "2026-03-27T00:17:55Z"
+  },
+  {
+    "additions": 15,
+    "author": "matdou",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45030 Configs like `tiny-random/glm-4v` store `rope_theta` at the top level of `config.json` alongside a `rope_scaling` dict (legacy format). For config classes that don't declare `rope_parameters` as a datac\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45036",
+    "created_at": "2026-03-26T23:00:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45036/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45036",
+    "labels": [],
+    "merged": false,
+    "number": 45036,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-25T14:35:45Z"
+    "title": "[fix] BC for legacy configs with top-level rope_theta when rope_parameters is set via rope_scaling",
+    "updated_at": "2026-03-28T23:47:33Z"
   },
   {
-    "additions": 286,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "- Introduce `DistributedConfig` - `DistributedConfig(tp_size=2, fsdp_size=2) # plans default to \"auto\"` replaces passing separate `tp_plan, tp_size, fsdp_plan kwargs`. Sizes auto-fill (specify one, the other defaults to 1). Plans default t\u2026",
-    "changed_files": 10,
+    "additions": 1,
+    "author": "asuryateja",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "The conversion operations table was missing PermuteForRope. Added it with its reverse (itself), consistent with how other operations are documented. PermuteForRope is self-inverse applying it twice returns the original tensor layout. # Wha\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44996",
-    "created_at": "2026-03-25T14:20:25Z",
-    "deletions": 283,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45035",
+    "created_at": "2026-03-26T21:05:17Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44996/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44996",
+    "files_url": "https://github.com/huggingface/transformers/pull/45035/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45035",
     "labels": [],
     "merged": true,
-    "number": 44996,
+    "number": 45035,
     "review_comments_count": 0,
     "state": "closed",
-    "title": " from_pretrained distributed refactor (FSDP2 + TP)",
-    "updated_at": "2026-03-26T15:32:27Z"
+    "title": "docs: add PermuteForRope to conversion operations reverse table",
+    "updated_at": "2026-03-26T22:09:53Z"
   },
   {
-    "additions": 3639,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 8,
+    "additions": 113,
+    "author": "sdharani91",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This is a follow up to https://github.com/huggingface/transformers/pull/44867 This PR fixes Qwen3.5 padding-free packed inputs on the linear-attention fast path by consuming collator-provided packed metadata. The li\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44994",
-    "created_at": "2026-03-25T14:02:50Z",
-    "deletions": 242,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45034",
+    "created_at": "2026-03-26T20:52:51Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44994/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44994",
+    "files_url": "https://github.com/huggingface/transformers/pull/45034/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45034",
     "labels": [],
     "merged": false,
-    "number": 44994,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-25T14:04:38Z"
+    "number": 45034,
+    "review_comments_count": 39,
+    "state": "open",
+    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels from data collator",
+    "updated_at": "2026-04-13T14:58:59Z"
   },
   {
-    "additions": 583,
-    "author": "tarekziade",
+    "additions": 3,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `make check-repo` can be quite slow, this patch adds file-level cache to speed up checks. We get up to a 27x speedup - cold cache : 46s - warm cache : 1.6s",
-    "changed_files": 20,
+    "body_excerpt": "In https://github.com/huggingface/transformers/pull/43514, `BaseImageProcessorFast` became `BaseImageProcessor` and `_further_process_kwargs` was renamed to `_standardize_kwargs` This PR adds some BC for the old name of this method.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44992",
-    "created_at": "2026-03-25T11:40:46Z",
-    "deletions": 48,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45033",
+    "created_at": "2026-03-26T20:36:40Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44992/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44992",
+    "files_url": "https://github.com/huggingface/transformers/pull/45033/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45033",
     "labels": [],
     "merged": true,
-    "number": 44992,
-    "review_comments_count": 4,
+    "number": 45033,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "refactoring: speedup static checks with disk cache",
-    "updated_at": "2026-03-31T12:34:20Z"
+    "title": "Add BC for `_further_process_kwargs`",
+    "updated_at": "2026-03-26T21:01:32Z"
   },
   {
-    "additions": 8,
-    "author": "ArthurZucker",
+    "additions": 163,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - BC for check model inputs",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Use multi runners to get new failing tests in a CI run.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44990",
-    "created_at": "2026-03-25T10:26:20Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45032",
+    "created_at": "2026-03-26T18:48:18Z",
+    "deletions": 67,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44990/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44990",
+    "files_url": "https://github.com/huggingface/transformers/pull/45032/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45032",
     "labels": [],
     "merged": true,
-    "number": 44990,
+    "number": 45032,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "More small vllm fixes",
-    "updated_at": "2026-03-25T13:05:44Z"
+    "title": "Use multi runners to check new failing tests in a CI run",
+    "updated_at": "2026-03-26T18:59:08Z"
   },
   {
-    "additions": 1,
-    "author": "3outeille",
+    "additions": 6,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "- Steps breakdown: - FSDP + TP: - https://github.com/huggingface/transformers/pull/44083 - [Request](https://github.com/huggingface/transformers/pull/44083#pullrequestreview-3975401342) to use our loading method https://github.com/huggingf\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Follow-up for https://github.com/huggingface/transformers/pull/44931, which added weight tying for Camembert. Only the CamembertForMaskedLM class had the right _tied_weights_keys, the CamembertForCausalLM had the in\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44989",
-    "created_at": "2026-03-25T09:10:02Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44989/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44989",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45031",
+    "created_at": "2026-03-26T18:28:54Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45031/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45031",
     "labels": [],
-    "merged": false,
-    "number": 44989,
+    "merged": true,
+    "number": 45031,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "\ud83d\udea8 Distributed training API",
-    "updated_at": "2026-04-13T09:45:16Z"
+    "state": "closed",
+    "title": "[`fix`] Use the correct _tied_weights_keys for CamembertForCausalLM",
+    "updated_at": "2026-03-26T18:57:29Z"
   },
   {
-    "additions": 730,
-    "author": "tarekziade",
+    "additions": 478,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds Rule 15 ``` if _tied_weights_keys is present and non-empty in modeling -> Config MUST contain the tie_word_embeddings field ```",
-    "changed_files": 16,
+    "body_excerpt": "# What does this PR do? Release workflow is failing",
+    "changed_files": 101,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 17,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44988",
-    "created_at": "2026-03-25T07:08:20Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45029",
+    "created_at": "2026-03-26T18:04:37Z",
+    "deletions": 768,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44988/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44988",
+    "files_url": "https://github.com/huggingface/transformers/pull/45029/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45029",
+    "labels": [],
+    "merged": true,
+    "number": 45029,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Fix release full",
+    "updated_at": "2026-03-27T06:34:00Z"
+  },
+  {
+    "additions": 400,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "- TODO - how will dtensor works with quantization ? - how will dtensor works with kernels ? - Needs end to end test (combine `verify_all_loss` -> `training` with saving + loading back for generate ?) - double check Save FSDP + TP - do test\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45028",
+    "created_at": "2026-03-26T17:50:07Z",
+    "deletions": 1667,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45028",
     "labels": [],
     "merged": false,
-    "number": 44988,
-    "review_comments_count": 3,
+    "number": 45028,
+    "review_comments_count": 13,
     "state": "open",
-    "title": "typing: rule 15 - checks for tie_word_embeddings presence",
-    "updated_at": "2026-04-13T08:12:24Z"
+    "title": "TP refactor for FSDP + TP integration",
+    "updated_at": "2026-04-14T14:25:27Z"
   },
   {
-    "additions": 0,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44855 On Python 3.13, placing a `# Copied from` comment between `@torch.jit.script` and the function definition causes an `IndentationError`. This happens because `torch.jit.script` calls `inspect.getsource()` followed by\u2026",
-    "changed_files": 2,
+    "additions": 714,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR adds per-request logits processors and overalls the way CB handles logits processors. It introduces batched logits processing with per-request parameters for continuous batching, enabling each request in a batch to use di\u2026",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44986",
-    "created_at": "2026-03-25T03:18:31Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45026",
+    "created_at": "2026-03-26T17:00:07Z",
+    "deletions": 202,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44986/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44986",
+    "files_url": "https://github.com/huggingface/transformers/pull/45026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45026",
     "labels": [],
     "merged": true,
-    "number": 44986,
-    "review_comments_count": 0,
+    "number": 45026,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "fix: remove Copied from comments between @torch.jit.script and def for Python 3.13 compat",
-    "updated_at": "2026-03-25T13:39:54Z"
+    "title": "[CB] Add per-request logits processors",
+    "updated_at": "2026-04-03T16:44:11Z"
   },
   {
     "additions": 2,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44913 When creating a `GPTNeoXConfig` (or `GPTNeoXJapaneseConfig`) with a non-default `rotary_pct`, the value is lost after a `save_pretrained` / `from_pretrained` round-trip. This happens because `convert_rope_params_to_\u2026",
-    "changed_files": 2,
+    "author": "layla1824",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "This PR adds a clarification comment regarding the behavior of rotary_pct. Currently, rotary_pct may reset to its default value (0.25) after reload due to the use of kwargs.pop. This note helps developers better understand this behavior.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44985",
-    "created_at": "2026-03-25T02:15:04Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45025",
+    "created_at": "2026-03-26T16:48:53Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44985/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44985",
+    "files_url": "https://github.com/huggingface/transformers/pull/45025/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45025",
     "labels": [],
-    "merged": true,
-    "number": 44985,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 45025,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: preserve rotary_pct across save/load cycle in GPTNeoX configs",
-    "updated_at": "2026-03-27T09:19:46Z"
+    "title": "Update configuration_Clarify rotary_pct reset behavior in GPTNeoXConfiggpt_neox.py",
+    "updated_at": "2026-03-27T09:05:48Z"
   },
   {
-    "additions": 2,
-    "author": "Butanium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? `maybe_autocast` calls `torch.is_autocast_enabled(device_type)` which raises a `RuntimeError` when `device_type` is `\"meta\"`: ``` RuntimeError: unknown device type for autocast in get_autocast_dispatch_key_from_dev\u2026",
+    "additions": 1,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? `tf` and `flax` are long gone (unknown extras generate only warnings so this slipped through the cracks)",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44984",
-    "created_at": "2026-03-25T01:39:23Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45024",
+    "created_at": "2026-03-26T16:21:30Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44984/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44984",
+    "files_url": "https://github.com/huggingface/transformers/pull/45024/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45024",
     "labels": [],
     "merged": true,
-    "number": 44984,
+    "number": 45024,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `maybe_autocast` crashing on meta device tensors",
-    "updated_at": "2026-03-25T17:45:03Z"
+    "title": "chore: remove old extras",
+    "updated_at": "2026-03-27T11:16:03Z"
   },
   {
-    "additions": 26,
-    "author": "Hyungkeun-Park-Nota",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `save_pretrained()` for models loaded with `dequantize=True`. `save_pretrained` calls `reverse_op` on all weight conversion operations from loading. Dequantize ops (`Mxfp4Dequantize`, `Fp8Dequantize`, `MetalD\u2026",
-    "changed_files": 4,
+    "additions": 2529,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? **Integration notes:** For now, this integration does not load mel filters from the checkpoint. The original model was trained backpropagating gradients in it, but we saw previously (with parakeet-ctc) that this doe\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44983",
-    "created_at": "2026-03-25T01:19:59Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45023",
+    "created_at": "2026-03-26T15:58:08Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44983/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44983",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45023/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45023",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
     "merged": true,
-    "number": 44983,
-    "review_comments_count": 6,
+    "number": 45023,
+    "review_comments_count": 13,
     "state": "closed",
-    "title": "fix: add identity reverse_op to dequantize ops for save_pretrained",
-    "updated_at": "2026-03-27T17:21:18Z"
+    "title": "Add cohere asr",
+    "updated_at": "2026-03-26T22:48:16Z"
   },
   {
-    "additions": 108,
-    "author": "AkshajKashyap",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43039 ## What does this PR do? When `prediction_loss_only=True` during evaluation and `use_liger_kernel=True`, `Trainer.prediction_step` now passes `skip_logits=True` to the model forward if the forward signature supports it and lab\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "popotest",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44981",
-    "created_at": "2026-03-25T00:38:02Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45022",
+    "created_at": "2026-03-26T15:33:19Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44981/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44981",
+    "files_url": "https://github.com/huggingface/transformers/pull/45022/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45022",
     "labels": [],
     "merged": false,
-    "number": 44981,
-    "review_comments_count": 6,
-    "state": "open",
-    "title": "Trainer: set skip_logits for loss-only eval when liger enabled",
-    "updated_at": "2026-04-09T15:31:50Z"
+    "number": 45022,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update _config.py",
+    "updated_at": "2026-03-27T13:17:36Z"
   },
   {
-    "additions": 6,
-    "author": "kallewoof",
+    "additions": 2,
+    "author": "hf-security-analysis[bot]",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Pre-patch unnecessarily breaks merging a LoRA adapter with a model using CUDA_VISIBLE_DEVICES= e.g. when VRAM is insufficient. It also breaks non-cuda machine operations (such as merging). # What does this PR do? This PR un-breaks `CUDA_VI\u2026",
-    "changed_files": 6,
+    "body_excerpt": "Update `.github/workflows/anti-slop.yml` workflow configuration. cc @tarekziade Closes huggingface/tracking-issues#30",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44980",
-    "created_at": "2026-03-24T23:50:07Z",
-    "deletions": 6,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45021",
+    "created_at": "2026-03-26T13:40:18Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44980/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44980",
+    "files_url": "https://github.com/huggingface/transformers/pull/45021/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45021",
     "labels": [],
     "merged": false,
-    "number": 44980,
+    "number": 45021,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "bug-fix: do not assume torch.cuda is available when setting up norm values, even if flash linear attention is available",
-    "updated_at": "2026-03-27T13:25:18Z"
+    "title": "chore: update anti-slop.yml",
+    "updated_at": "2026-03-26T13:53:57Z"
   },
   {
-    "additions": 492,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Introduces `src/transformers/module_fusion.py`, a utility for fusing adjacent submodules in a model into a single FusedModule that executes them as a chain in one forward pass. The key components are: - `RegistryCol\u2026",
-    "changed_files": 2,
+    "additions": 21,
+    "author": "javierdejesusda",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44877 Loading `ibm-granite/granite-4.0-1b-speech` fails with `StrictDataclassFieldValidationError` because its config.json stores `embedding_multiplier` and `logits_scaling` as integers (e.g. `12`, `8`), but\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44979",
-    "created_at": "2026-03-24T22:33:31Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45019",
+    "created_at": "2026-03-26T11:19:19Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44979/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44979",
+    "files_url": "https://github.com/huggingface/transformers/pull/45019/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45019",
     "labels": [],
-    "merged": false,
-    "number": 44979,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Module Fusion API",
-    "updated_at": "2026-04-13T09:13:07Z"
-  },
-  {
-    "additions": 4,
-    "author": "cjkindel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? `_can_set_attn_implementation` and `_can_set_experts_implementation` both do a direct subscript lookup into `sys.modules`: ```python class_module = sys.modules[cls.__module__] ``` If the module is not registered und\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44978",
-    "created_at": "2026-03-24T21:01:11Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44978/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44978",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44978,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45019,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix: handle absent sys.modules entry in modeling_utils",
-    "updated_at": "2026-03-26T12:25:31Z"
+    "title": "Fix GraniteConfig type hints to accept int for multiplier fields",
+    "updated_at": "2026-03-27T09:30:17Z"
   },
   {
-    "additions": 2,
-    "author": "hmellor",
+    "additions": 1802,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "- Adds a type hint to `ModernVBertForMaskedLM.__init__` - Removes `tie_word_embeddings` from `Qwen2VLTextConfig` (and therefore also `Qwen2_5_VLTextConfig`) because it's not valid for these models - Remove hack from `ColQwen2Config` (and t\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? I thought a lot about how to make this dynamic for devs (who add models etc.) and keep static for users. The whole thing of automatically inferring config and model-type based on code runs with AST, so we don't have\u2026",
+    "changed_files": 65,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44976",
-    "created_at": "2026-03-24T19:26:33Z",
-    "deletions": 10,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45018",
+    "created_at": "2026-03-26T11:18:13Z",
+    "deletions": 1435,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44976/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44976",
+    "files_url": "https://github.com/huggingface/transformers/pull/45018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45018",
     "labels": [],
-    "merged": true,
-    "number": 44976,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix tie_word_embedding issues with `Qwen2VL`",
-    "updated_at": "2026-03-24T20:55:15Z"
+    "merged": false,
+    "number": 45018,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Dynamic auto mapping (PoC)",
+    "updated_at": "2026-04-13T18:16:56Z"
   },
   {
-    "additions": 6971,
-    "author": "philippguevorguian",
-    "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 20,
+    "additions": 571,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? ### Get the rope operation right Before: NeoX split-half style After: GPT-J/interleaved style(`interleaved=True` same as `is_neox_style=Flase`) the right one ### Get rid of `F.relu` Reason: - `F.relu` works with `ac\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44975",
-    "created_at": "2026-03-24T17:12:31Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45017",
+    "created_at": "2026-03-26T09:21:10Z",
+    "deletions": 49,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44975/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44975",
+    "files_url": "https://github.com/huggingface/transformers/pull/45017/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45017",
     "labels": [],
     "merged": false,
-    "number": 44975,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: rebase main; clean config reads, ImageProcessor backend, misc cleanup",
-    "updated_at": "2026-03-24T17:13:42Z"
+    "number": 45017,
+    "review_comments_count": 5,
+    "state": "open",
+    "title": "[WIP][Fix] GLM 5 set `apply_rotary_pos_emb` to `is_neox_style=False` && remove `F.relu()`",
+    "updated_at": "2026-04-14T09:36:01Z"
   },
   {
-    "additions": 1084,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "TODO: - Saving seems to take a bit of time tho. Need investigation - Need to check if it works in 1D (FSDP or TP)and 2D (FSDP + TP). Running the script from https://github.com/huggingface/transformers/pull/44996 ``` (env_pr-44974-fsdp-core\u2026",
-    "changed_files": 12,
+    "additions": 64,
+    "author": "inisis",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44974",
-    "created_at": "2026-03-24T16:13:25Z",
-    "deletions": 332,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45016",
+    "created_at": "2026-03-26T09:09:41Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44974/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44974",
+    "files_url": "https://github.com/huggingface/transformers/pull/45016/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45016",
     "labels": [],
     "merged": false,
-    "number": 44974,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Refactor core_model_loading to support FSDP shard-on-read loading",
-    "updated_at": "2026-04-07T11:39:21Z"
+    "number": 45016,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: glm5 inference bug",
+    "updated_at": "2026-03-26T12:10:21Z"
   },
   {
-    "additions": 22,
-    "author": "andylizf",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds `.item()` to `max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()` in all vision attention modules that pass this value to `flash_attn_varlen_func`. ### Context On **released versions** (e.g. 4.52.4), using\u2026",
-    "changed_files": 19,
+    "additions": 55,
+    "author": "pnehete23",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes a `KeyError` in `_can_set_attn_implementation` and `_can_set_experts_implementation` when a model's module is absent from `sys.modules`. Fixes #45003 ## Root Cause Both `_can_set_attn_implementation` (line 19\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44973",
-    "created_at": "2026-03-24T15:42:32Z",
-    "deletions": 22,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45015",
+    "created_at": "2026-03-26T08:52:10Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44973/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44973",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45015/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45015",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44973,
+    "number": 45015,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix max_seqlen type in vision attention for torch.compile + FA2",
-    "updated_at": "2026-03-25T14:12:50Z"
+    "state": "closed",
+    "title": "fix: guard sys.modules access in _can_set_attn/experts_implementation",
+    "updated_at": "2026-03-26T12:15:50Z"
   },
   {
-    "additions": 17,
-    "author": "Abdennacer-Badaoui",
+    "additions": 16,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title. Updating Gemma3/Gemma3n expectations.",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? #30674 refactors the way we obtain CircleCI test files to run for each job. It always puts [\"tests\"] for `tests_hub`, so each commit of each PR will run it, no matter if there is any change to codebase. Let's reduce\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44972",
-    "created_at": "2026-03-24T15:11:50Z",
-    "deletions": 12,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45014",
+    "created_at": "2026-03-26T08:52:04Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44972/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44972",
+    "files_url": "https://github.com/huggingface/transformers/pull/45014/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45014",
     "labels": [],
     "merged": true,
-    "number": 44972,
-    "review_comments_count": 10,
+    "number": 45014,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[AMD CI] Gemma3/Gemma3n Expectations",
-    "updated_at": "2026-03-24T16:30:03Z"
+    "title": "Don't run `tests_hub` if no tests found",
+    "updated_at": "2026-03-26T09:32:39Z"
   },
   {
-    "additions": 0,
-    "author": "ArthurZucker",
+    "additions": 325,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Removed the tokenizer_class attr was never there to begin with, and kwargs are now supported. This was failing some test on vllm ci. Fixes https://buildkite.com/vllm/ci/builds/57601/steps/canvas?sid=019d1aec-aa5a-41\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? - Adds a new checker in `make chek-repo` that will `import transformers` and count the number of imported module. - Lazy import of torch when doing `import transformers` The change will reduce the import time from ~\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44971",
-    "created_at": "2026-03-24T14:59:36Z",
-    "deletions": 11,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45013",
+    "created_at": "2026-03-26T07:47:23Z",
+    "deletions": 49,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44971/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44971",
+    "files_url": "https://github.com/huggingface/transformers/pull/45013/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45013",
     "labels": [],
     "merged": true,
-    "number": 44971,
-    "review_comments_count": 1,
+    "number": 45013,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[ `vllm x v5`] nit",
-    "updated_at": "2026-03-24T17:40:05Z"
+    "title": "feature: added import complexity checker",
+    "updated_at": "2026-03-31T07:01:08Z"
   },
   {
-    "additions": 20,
-    "author": "IlyasMoutawwakil",
+    "additions": 155,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Cache to speed up ast walks, and ast tweak",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44970",
-    "created_at": "2026-03-24T13:49:21Z",
-    "deletions": 76,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45012",
+    "created_at": "2026-03-26T07:10:45Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44970/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44970",
+    "files_url": "https://github.com/huggingface/transformers/pull/45012/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45012",
     "labels": [],
     "merged": true,
-    "number": 44970,
-    "review_comments_count": 1,
+    "number": 45012,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CPU 16 bytes alignment issue using equivalent fallback",
-    "updated_at": "2026-03-25T09:01:03Z"
+    "title": "refactor: added cache in check_repo",
+    "updated_at": "2026-03-30T06:44:23Z"
   },
   {
-    "additions": 4,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Extends the CI so we can use Make and read toml files",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
+    "additions": 64,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh Hi, can you help review? Thx! Current error (before this PR): ```bash _ PI0ForConditionalGenerationModelTest.test_flash_attn_2_inference_equivalence _ self = <tests.models.pi0.test_modeling_pi0.PI0ForConditionalGenerationModelTest\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44968",
-    "created_at": "2026-03-24T11:43:24Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45011",
+    "created_at": "2026-03-26T06:27:09Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44968/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44968",
+    "files_url": "https://github.com/huggingface/transformers/pull/45011/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45011",
     "labels": [],
-    "merged": false,
-    "number": 44968,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45011,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Added Make to the docker and `tomli` to `.[quality]`",
-    "updated_at": "2026-03-24T15:06:29Z"
+    "title": "skip 2 invalid test cases for pi0 model",
+    "updated_at": "2026-04-10T10:41:16Z"
   },
   {
-    "additions": 87,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix: FA kernel launches currently are not thread-safe (nogil) in multi-gpu env. This simple patch fixes the issue. ```py # Set the correct CUDA context before launching the FlashAttention kernel. with torch.cuda.dev\u2026",
-    "changed_files": 2,
+    "additions": 75,
+    "author": "dagecko",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Fix: CI/CD Security Vulnerabilities in GitHub Actions Hi! [Runner Guard](https://github.com/Vigilant-LLC/runner-guard), an open-source CI/CD security scanner by [Vigilant Cyber Security](https://www.vigilantdefense.com), identified secu\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44967",
-    "created_at": "2026-03-24T11:33:45Z",
-    "deletions": 84,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44967/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44967",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45010",
+    "created_at": "2026-03-26T06:19:29Z",
+    "deletions": 71,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45010/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45010",
     "labels": [],
     "merged": false,
-    "number": 44967,
+    "number": 45010,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[WIP] Fix FA kernel launch needs correct cuda device ctx in multi-gpu env",
-    "updated_at": "2026-04-08T15:10:47Z"
+    "title": "fix: pin 69 unpinned action(s),extract 2 unsafe expression(s) to env vars",
+    "updated_at": "2026-03-27T22:20:57Z"
   },
   {
-    "additions": 8,
-    "author": "pramilajangid",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44964 ## Summary This PR restores backward compatibility for `CommonKwargs` in `transformers.processing_utils`, which is still referenced by some remote processor implementations. ## Problem After the typed-dict cleanup (commit `533\u2026",
-    "changed_files": 1,
+    "additions": 330,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch improves the docstring checker implementation (redundant AST walks) and adds cache. For the AST calls, 2.3x speedup check_docstrings.py --check_all on my M1: - before : 29.3s - after: 12.6s",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44966",
-    "created_at": "2026-03-24T11:06:57Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45009",
+    "created_at": "2026-03-26T05:31:41Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44966/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44966",
+    "files_url": "https://github.com/huggingface/transformers/pull/45009/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45009",
     "labels": [],
-    "merged": false,
-    "number": 44966,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 45009,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Fix backward compatibility for CommonKwargs in processing_utils (brea\u2026",
-    "updated_at": "2026-03-24T12:48:44Z"
+    "title": "refactor: speed up docstring checker",
+    "updated_at": "2026-03-27T07:21:11Z"
   },
   {
-    "additions": 37,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 380,
+    "author": "Krishnachaitanyakc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fix type annotation bugs across config classes that cause `@strict` validation errors from `huggingface_hub`. ### Bool fields mistyped as `int` (22 fixes) Fields with boolean defaults (`True`/`False`) were annotated as `int` ins\u2026",
+    "changed_files": 198,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44965",
-    "created_at": "2026-03-24T10:59:31Z",
-    "deletions": 32,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45007",
+    "created_at": "2026-03-25T23:12:53Z",
+    "deletions": 380,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44965/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44965",
+    "files_url": "https://github.com/huggingface/transformers/pull/45007/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45007",
     "labels": [],
-    "merged": false,
-    "number": 44965,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "try",
-    "updated_at": "2026-03-24T11:19:27Z"
+    "merged": true,
+    "number": 45007,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "fix: correct type annotations across config classes for @strict validation",
+    "updated_at": "2026-04-01T17:50:09Z"
   },
   {
     "additions": 3,
-    "author": "josh-kean",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an import in src/transformers/video_processing_utils.py that was causing the main build to fail Fixes # 44933 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs\u2026",
+    "author": "Krishnachaitanyakc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44869 Adds a bounds check in `_split_tokens_on_unicode()` in `tokenization_whisper.py` to handle trailing Unicode replacement characters (U+FFFD) at the end of decoded token streams without crashing with `IndexError`. ##\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44958",
-    "created_at": "2026-03-23T20:07:09Z",
-    "deletions": 2,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45006",
+    "created_at": "2026-03-25T23:03:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44958/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44958",
+    "files_url": "https://github.com/huggingface/transformers/pull/45006/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45006",
     "labels": [],
     "merged": false,
-    "number": 44958,
-    "review_comments_count": 1,
+    "number": 45006,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fixed import error with PILImageResampling",
-    "updated_at": "2026-03-24T13:53:00Z"
+    "title": "fix: prevent IndexError in Whisper timestamp decode on trailing replacement char",
+    "updated_at": "2026-04-06T01:03:09Z"
   },
   {
-    "additions": 1272,
-    "author": "bigshanedogg",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds native Transformers support for **[HyperCLOVA X SEED Think 14B](https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B)**, a 14.74B-parameter Korean reasoning LLM developed by NAVER Cloud. - relate\u2026",
-    "changed_files": 12,
+    "additions": 10,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR (grouped them together since they share related root causes OR the code changes were extremely minimal and didn't warrant separate PRs): \u2192 **Phi-3**\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44956",
-    "created_at": "2026-03-23T19:34:30Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45004",
+    "created_at": "2026-03-25T19:58:57Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44956/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44956",
+    "files_url": "https://github.com/huggingface/transformers/pull/45004/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45004",
     "labels": [],
-    "merged": false,
-    "number": 44956,
-    "review_comments_count": 24,
-    "state": "open",
-    "title": "Add HyperCLOVAX SEED Think 14B",
-    "updated_at": "2026-04-12T06:17:53Z"
+    "merged": true,
+    "number": 45004,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "fix(testing): Fix Parakeet, Evolla, Pi0, and Phi-3 test failures on main CI",
+    "updated_at": "2026-03-27T15:06:52Z"
   },
   {
-    "additions": 0,
-    "author": "stevhliu",
+    "additions": 1,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "removes outdated qa pipeline reference",
+    "body_excerpt": "`None` is a valid value that can be used to disable chunked attention in `DynamicCache` and Flex Attention. hf.co/morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct is an example of a checkpoint which does this.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44954",
-    "created_at": "2026-03-23T17:20:37Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45002",
+    "created_at": "2026-03-25T17:40:14Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44954/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44954",
+    "files_url": "https://github.com/huggingface/transformers/pull/45002/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45002",
     "labels": [],
     "merged": true,
-    "number": 44954,
+    "number": 45002,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] pipeline cleanup",
-    "updated_at": "2026-04-08T17:03:36Z"
+    "title": "Fix type hint for `attention_chunk_size` in `Llama4TextConfig`",
+    "updated_at": "2026-03-25T20:42:11Z"
   },
   {
-    "additions": 861,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Decouples `kwargs` manipulation from hub's strict decorator, and ensures that all subclasses of a `PreTrainedConfig` accept any kwargs which is what we supported prev. Not all remote code has `@strict` or has an `__\u2026",
-    "changed_files": 536,
+    "additions": 20,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? For [torch>=2.10.0](https://docs.pytorch.org/docs/2.10/generated/torch.nn.functional.grouped_mm.html#torch-nn-functional-grouped-mm), the minimum CUDA compute capability requirement for `torch.nn.functional.grouped_\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44953",
-    "created_at": "2026-03-23T17:13:39Z",
-    "deletions": 824,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45001",
+    "created_at": "2026-03-25T17:00:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44953/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44953",
+    "files_url": "https://github.com/huggingface/transformers/pull/45001/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45001",
     "labels": [],
     "merged": true,
-    "number": 44953,
-    "review_comments_count": 0,
+    "number": 45001,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Config kwargs",
-    "updated_at": "2026-03-24T14:14:46Z"
+    "title": "Add cuda compatibility check for using `grouped_mm`",
+    "updated_at": "2026-04-10T07:54:45Z"
   },
   {
-    "additions": 10,
-    "author": "Jess-Co-Del",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes the non existence of output dictionary change, when parameter output_hidden_states=True is passed to models like CLIP or SigLip. This is especially pertinent for the vision model config. According to #42759 no\u2026",
-    "changed_files": 2,
+    "additions": 22,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, after https://github.com/huggingface/transformers/pull/44976 users will be seeing a `missing_weights - lm_head not found` error even though the model doesn't use an lm head On the way also deleted unne\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44952",
-    "created_at": "2026-03-23T17:02:50Z",
-    "deletions": 2,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45000",
+    "created_at": "2026-03-25T16:28:55Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44952/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44952",
+    "files_url": "https://github.com/huggingface/transformers/pull/45000/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45000",
     "labels": [],
-    "merged": false,
-    "number": 44952,
+    "merged": true,
+    "number": 45000,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: Add correct return behaviour when output_hidden_states=True for CLIP and SIGLIP vision models",
-    "updated_at": "2026-03-24T11:19:35Z"
+    "state": "closed",
+    "title": "Embedding VLMs don't need a head",
+    "updated_at": "2026-03-27T09:57:18Z"
   },
   {
-    "additions": 113,
-    "author": "hemantmm",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This pull request adds routing replay functionality for mixture-of-experts (MoE) model types by giving users the option to override router probabilities while processing a forward pass through their models. <!-- Con\u2026",
+    "additions": 2450,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44951",
-    "created_at": "2026-03-23T16:29:46Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44951/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44951",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44999",
+    "created_at": "2026-03-25T16:21:37Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44999/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44999",
     "labels": [],
     "merged": false,
-    "number": 44951,
+    "number": 44999,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "feat: Add router_logits override to enable Routing Replay for MoE models",
-    "updated_at": "2026-03-26T12:36:20Z"
+    "state": "closed",
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-30T23:21:49Z"
   },
   {
-    "additions": 1346,
-    "author": "Cyrilvallez",
+    "additions": 1179,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This PR finally makes mamba layer caches first class citizen, and adds native support for them. It supports the following layers combinations: - all mamba layers - alternating attention layer/mamba\u2026",
-    "changed_files": 64,
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44950",
-    "created_at": "2026-03-23T16:25:13Z",
-    "deletions": 4113,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44950/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44950",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44997",
+    "created_at": "2026-03-25T14:23:13Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44997/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44997",
     "labels": [],
-    "merged": true,
-    "number": 44950,
-    "review_comments_count": 48,
+    "merged": false,
+    "number": 44997,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "\ud83d\udea8 [Cache] Native mamba & hybrid cache",
-    "updated_at": "2026-03-31T13:09:44Z"
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-25T14:35:45Z"
   },
   {
-    "additions": 101,
-    "author": "Charly21r",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44936 This PR fixes an issue with `NotebookProgressCallback` in the `Trainer` where calling evaluate() before or after training would crash due to the training tracker being `None`. The callback now properly\u2026",
-    "changed_files": 5,
+    "additions": 286,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "- Introduce `DistributedConfig` - `DistributedConfig(tp_size=2, fsdp_size=2) # plans default to \"auto\"` replaces passing separate `tp_plan, tp_size, fsdp_plan kwargs`. Sizes auto-fill (specify one, the other defaults to 1). Plans default t\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44949",
-    "created_at": "2026-03-23T16:07:50Z",
-    "deletions": 6,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44996",
+    "created_at": "2026-03-25T14:20:25Z",
+    "deletions": 283,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44949/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44949",
+    "files_url": "https://github.com/huggingface/transformers/pull/44996/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44996",
     "labels": [],
-    "merged": false,
-    "number": 44949,
-    "review_comments_count": 12,
-    "state": "open",
-    "title": "Fix: NotebookProgressCallback crash when evaluating with the Trainer",
-    "updated_at": "2026-04-10T15:33:51Z"
+    "merged": true,
+    "number": 44996,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": " from_pretrained distributed refactor (FSDP2 + TP)",
+    "updated_at": "2026-03-26T15:32:27Z"
   },
   {
-    "additions": 1,
-    "author": "heycorgi",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 3639,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44948",
-    "created_at": "2026-03-23T15:33:56Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44994",
+    "created_at": "2026-03-25T14:02:50Z",
+    "deletions": 242,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44948/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44948",
+    "files_url": "https://github.com/huggingface/transformers/pull/44994/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44994",
     "labels": [],
     "merged": false,
-    "number": 44948,
+    "number": 44994,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Create aa.py",
-    "updated_at": "2026-03-23T15:34:35Z"
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-25T14:04:38Z"
   },
   {
-    "additions": 123,
-    "author": "zucchini-nlp",
+    "additions": 583,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The doc was generated by Claude. I deleted unnecessary repetitions and fixed a few moments to be more precise. We don't really need to merge it now so if you think the text is too LLM, feel free to take this as an i\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? `make check-repo` can be quite slow, this patch adds file-level cache to speed up checks. We get up to a 27x speedup - cold cache : 46s - warm cache : 1.6s",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44947",
-    "created_at": "2026-03-23T13:23:04Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44947/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44947",
-    "labels": [],
-    "merged": true,
-    "number": 44947,
-    "review_comments_count": 16,
-    "state": "closed",
-    "title": "Add doc page for capturing outputs",
-    "updated_at": "2026-03-26T13:08:46Z"
-  },
-  {
-    "additions": 14,
-    "author": "BSchilperoort",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 13,
-    "cluster_id": "cluster-44821-7",
-    "cluster_ids": [
-      "cluster-44821-7"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44946",
-    "created_at": "2026-03-23T12:18:34Z",
-    "deletions": 14,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44992",
+    "created_at": "2026-03-25T11:40:46Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44946/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44946",
+    "files_url": "https://github.com/huggingface/transformers/pull/44992/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44992",
     "labels": [],
     "merged": true,
-    "number": 44946,
-    "review_comments_count": 0,
+    "number": 44992,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Correct docstrings for `from_pretrained` (url input deprecated)",
-    "updated_at": "2026-03-23T13:05:16Z"
+    "title": "refactoring: speedup static checks with disk cache",
+    "updated_at": "2026-03-31T12:34:20Z"
   },
   {
-    "additions": 71,
-    "author": "zucchini-nlp",
+    "additions": 8,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? @hmellor",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? - BC for check model inputs",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44943",
-    "created_at": "2026-03-23T10:58:40Z",
-    "deletions": 9,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44990",
+    "created_at": "2026-03-25T10:26:20Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44943/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44943",
+    "files_url": "https://github.com/huggingface/transformers/pull/44990/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44990",
     "labels": [],
     "merged": true,
-    "number": 44943,
-    "review_comments_count": 1,
+    "number": 44990,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Clearer type hints and fix rope validation in configs",
-    "updated_at": "2026-03-23T13:32:11Z"
+    "title": "More small vllm fixes",
+    "updated_at": "2026-03-25T13:05:44Z"
   },
   {
-    "additions": 220,
-    "author": "hmellor",
+    "additions": 542,
+    "author": "3outeille",
     "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 3,
+    "body_excerpt": "# Distributed Training API ## Goal ```python # torchrun --nproc_per_node=4 train_fsdp_tp.py import os import torch from torch.utils.data import DataLoader from datasets import load_dataset from transformers import AutoModelForCausalLM, Aut\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44942",
-    "created_at": "2026-03-23T10:46:23Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44989",
+    "created_at": "2026-03-25T09:10:02Z",
+    "deletions": 0,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44942/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44942",
+    "files_url": "https://github.com/huggingface/transformers/pull/44989/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44989",
     "labels": [],
     "merged": false,
-    "number": 44942,
+    "number": 44989,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add inference time layer fusion optimisations via `PreTrainedModel.from_pretrained(fuse_layers=True)`",
-    "updated_at": "2026-03-25T16:14:19Z"
+    "title": "\ud83d\udea8 Distributed training API",
+    "updated_at": "2026-04-13T16:44:35Z"
   },
   {
-    "additions": 4,
-    "author": "ydshieh",
+    "additions": 730,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the failing job after #43514 (the fix is effefctive, see [here](https://github.com/huggingface/transformers/actions/runs/23433395911/job/68165255513?pr=44941)) [Update Transformers metadata](https://github.com/h\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Adds Rule 15 ``` if _tied_weights_keys is present and non-empty in modeling -> Config MUST contain the tie_word_embeddings field ```",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44941",
-    "created_at": "2026-03-23T10:42:09Z",
-    "deletions": 1,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44988",
+    "created_at": "2026-03-25T07:08:20Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44941/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44941",
+    "files_url": "https://github.com/huggingface/transformers/pull/44988/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44988",
     "labels": [],
-    "merged": true,
-    "number": 44941,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix failing job `Update Transformers metadata` after #43514",
-    "updated_at": "2026-03-23T13:41:39Z"
+    "merged": false,
+    "number": 44988,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "typing: rule 15 - checks for tie_word_embeddings presence",
+    "updated_at": "2026-04-14T07:07:02Z"
   },
   {
-    "additions": 138,
-    "author": "Qubitium",
+    "additions": 0,
+    "author": "Krishnachaitanyakc",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Model loading of same model path but 2 different threads (2 different instances) have meta device tensor issues: unloaded meta/empty embedding/lm-head when it should not be empty post model load. Cause: `tie_weight(\u2026",
-    "changed_files": 3,
+    "body_excerpt": "## Summary Fixes #44855 On Python 3.13, placing a `# Copied from` comment between `@torch.jit.script` and the function definition causes an `IndentationError`. This happens because `torch.jit.script` calls `inspect.getsource()` followed by\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44940",
-    "created_at": "2026-03-23T09:55:57Z",
-    "deletions": 10,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44986",
+    "created_at": "2026-03-25T03:18:31Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44940/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44940",
+    "files_url": "https://github.com/huggingface/transformers/pull/44986/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44986",
     "labels": [],
-    "merged": false,
-    "number": 44940,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Fix tie_weights skipping logic is not tied to model thread scope",
-    "updated_at": "2026-04-07T02:01:50Z"
+    "merged": true,
+    "number": 44986,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: remove Copied from comments between @torch.jit.script and def for Python 3.13 compat",
+    "updated_at": "2026-03-25T13:39:54Z"
   },
   {
-    "additions": 2038,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Refactored and cleaned up model linter - separated package - one rule per module - refactored legacy checks into their own rules - simplified pattern, duplication removal",
-    "changed_files": 25,
+    "additions": 2,
+    "author": "Krishnachaitanyakc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44913 When creating a `GPTNeoXConfig` (or `GPTNeoXJapaneseConfig`) with a non-default `rotary_pct`, the value is lost after a `save_pretrained` / `from_pretrained` round-trip. This happens because `convert_rope_params_to_\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44939",
-    "created_at": "2026-03-23T08:45:36Z",
-    "deletions": 1446,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44985",
+    "created_at": "2026-03-25T02:15:04Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44939/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44939",
+    "files_url": "https://github.com/huggingface/transformers/pull/44985/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44985",
     "labels": [],
     "merged": true,
-    "number": 44939,
-    "review_comments_count": 5,
+    "number": 44985,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "refactor: mlinter as its own package",
-    "updated_at": "2026-03-24T07:56:15Z"
+    "title": "fix: preserve rotary_pct across save/load cycle in GPTNeoX configs",
+    "updated_at": "2026-03-27T09:19:46Z"
   },
   {
     "additions": 2,
-    "author": "VanshikaSohal",
+    "author": "Butanium",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes two small but impactful bugs in the BART documentation: 1. **Variable shadowing bug**: In the Pipeline example, the variable was named `pipeline` which shadows the imported `pipeline` function. Renamed to `fi\u2026",
+    "body_excerpt": "## What does this PR do? `maybe_autocast` calls `torch.is_autocast_enabled(device_type)` which raises a `RuntimeError` when `device_type` is `\"meta\"`: ``` RuntimeError: unknown device type for autocast in get_autocast_dispatch_key_from_dev\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44935",
-    "created_at": "2026-03-22T18:45:01Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44984",
+    "created_at": "2026-03-25T01:39:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44935/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44935",
+    "files_url": "https://github.com/huggingface/transformers/pull/44984/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44984",
     "labels": [],
     "merged": true,
-    "number": 44935,
+    "number": 44984,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix variable shadowing in pipeline example and typo in BART docs (BERT \u2192 BART)",
-    "updated_at": "2026-03-23T14:28:04Z"
+    "title": "Fix `maybe_autocast` crashing on meta device tensors",
+    "updated_at": "2026-03-25T17:45:03Z"
   },
   {
-    "additions": 9,
-    "author": "Sai-Suraj-27",
+    "additions": 26,
+    "author": "Hyungkeun-Park-Nota",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [T5ModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524758706#step:14:1449) & this [Qwen2IntegrationTest](https://github.com/huggingface/transformer\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## What does this PR do? Fixes `save_pretrained()` for models loaded with `dequantize=True`. `save_pretrained` calls `reverse_op` on all weight conversion operations from loading. Dequantize ops (`Mxfp4Dequantize`, `Fp8Dequantize`, `MetalD\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44934",
-    "created_at": "2026-03-22T18:03:34Z",
-    "deletions": 7,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44983",
+    "created_at": "2026-03-25T01:19:59Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44934/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44934",
+    "files_url": "https://github.com/huggingface/transformers/pull/44983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44983",
     "labels": [],
     "merged": true,
-    "number": 44934,
-    "review_comments_count": 0,
+    "number": 44983,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix failing `T5ModelIntegrationTest`",
-    "updated_at": "2026-03-24T14:50:10Z"
+    "title": "fix: add identity reverse_op to dequantize ops for save_pretrained",
+    "updated_at": "2026-03-27T17:21:18Z"
   },
   {
-    "additions": 1,
-    "author": "r266-tech",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44908 The `get_inverse_sqrt_schedule` function accepts `timescale` and `last_epoch` parameters, but `get_scheduler` was not forwarding `scheduler_specific_kwargs` to it. This caused user-provided kwargs like\u2026",
-    "changed_files": 1,
+    "additions": 108,
+    "author": "AkshajKashyap",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43039 ## What does this PR do? When `prediction_loss_only=True` during evaluation and `use_liger_kernel=True`, `Trainer.prediction_step` now passes `skip_logits=True` to the model forward if the forward signature supports it and lab\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44932",
-    "created_at": "2026-03-22T17:30:56Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44981",
+    "created_at": "2026-03-25T00:38:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44932/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44932",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44981/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44981",
+    "labels": [],
     "merged": false,
-    "number": 44932,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Pass scheduler_specific_kwargs to inverse_sqrt scheduler",
-    "updated_at": "2026-03-23T12:44:16Z"
+    "number": 44981,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "Trainer: set skip_logits for loss-only eval when liger enabled",
+    "updated_at": "2026-04-09T15:31:50Z"
   },
   {
-    "additions": 1,
-    "author": "r266-tech",
+    "additions": 6,
+    "author": "kallewoof",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a v5 regression where `CamembertForMaskedLM` (and all CamemBERT masked-LM tasks) produces near-zero, near-uniform logits, making the model completely non-functional. ### Root cause In v5, `modeling_utils.get_\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Pre-patch unnecessarily breaks merging a LoRA adapter with a model using CUDA_VISIBLE_DEVICES= e.g. when VRAM is insufficient. It also breaks non-cuda machine operations (such as merging). # What does this PR do? This PR un-breaks `CUDA_VI\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44931",
-    "created_at": "2026-03-22T17:28:57Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44980",
+    "created_at": "2026-03-24T23:50:07Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44931/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44931",
+    "files_url": "https://github.com/huggingface/transformers/pull/44980/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44980",
     "labels": [],
-    "merged": true,
-    "number": 44931,
+    "merged": false,
+    "number": 44980,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(camembert): add tie_word_embeddings=True to CamembertConfig",
-    "updated_at": "2026-03-25T07:09:37Z"
+    "title": "bug-fix: do not assume torch.cuda is available when setting up norm values, even if flash linear attention is available",
+    "updated_at": "2026-03-27T13:25:18Z"
   },
   {
-    "additions": 103,
-    "author": "javierdejesusda",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Fixes #44912 \u2014 MXFP4 quantization error messages combine `is_triton_available()` and `is_kernels_available()` into a single `kernels_available` boolean, making it impossible to identify which dependency is missing - Split the\u2026",
+    "additions": 492,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Introduces `src/transformers/module_fusion.py`, a utility for fusing adjacent submodules in a model into a single FusedModule that executes them as a chain in one forward pass. The key components are: - `RegistryCol\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44930",
-    "created_at": "2026-03-22T17:27:20Z",
-    "deletions": 13,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44979",
+    "created_at": "2026-03-24T22:33:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44930/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44930",
+    "files_url": "https://github.com/huggingface/transformers/pull/44979/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44979",
     "labels": [],
-    "merged": true,
-    "number": 44930,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "fix: split MXFP4 dependency checks for specific error messages",
-    "updated_at": "2026-03-24T15:33:14Z"
+    "merged": false,
+    "number": 44979,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Module Fusion API",
+    "updated_at": "2026-04-13T19:46:19Z"
   },
   {
-    "additions": 26,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Problem In `TokenizersBackend.convert_to_native_format()`, when a tokenizer has a custom `__init__` (the `elif` branch), `tokenizer.json` was parsed **twice**: 1. `TokenizerFast.from_file(fast_tokenizer_file)` \u2014 full Rust parse includin\u2026",
+    "additions": 4,
+    "author": "cjkindel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? `_can_set_attn_implementation` and `_can_set_experts_implementation` both do a direct subscript lookup into `sys.modules`: ```python class_module = sys.modules[cls.__module__] ``` If the module is not registered und\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44927",
-    "created_at": "2026-03-22T15:33:23Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44978",
+    "created_at": "2026-03-24T21:01:11Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44927/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44927",
-    "labels": [],
-    "merged": true,
-    "number": 44927,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44978/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44978",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44978,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: improve processor loading performance by avoiding redundant tokenizer parsing",
-    "updated_at": "2026-03-23T11:03:52Z"
+    "title": "fix: handle absent sys.modules entry in modeling_utils",
+    "updated_at": "2026-03-26T12:25:31Z"
   },
   {
-    "additions": 25,
-    "author": "yonigozlan",
+    "additions": 2,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Solve import errors when trying to import `from transformers.models.llama4.image_processing_llama4_fast import Llama4ImageProcessorFast` for example",
-    "changed_files": 2,
+    "body_excerpt": "- Adds a type hint to `ModernVBertForMaskedLM.__init__` - Removes `tie_word_embeddings` from `Qwen2VLTextConfig` (and therefore also `Qwen2_5_VLTextConfig`) because it's not valid for these models - Remove hack from `ColQwen2Config` (and t\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44926",
-    "created_at": "2026-03-22T14:46:17Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44976",
+    "created_at": "2026-03-24T19:26:33Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44926/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44926",
+    "files_url": "https://github.com/huggingface/transformers/pull/44976/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44976",
     "labels": [],
     "merged": true,
-    "number": 44926,
-    "review_comments_count": 1,
+    "number": 44976,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix backward compatibility for full path imports of Fast Image Processors",
-    "updated_at": "2026-03-23T14:16:49Z"
+    "title": "Fix tie_word_embedding issues with `Qwen2VL`",
+    "updated_at": "2026-03-24T20:55:15Z"
   },
   {
-    "additions": 482,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds a first-class MoE routing capture/replay API for Qwen2Moe and introduces shared MoE routing helpers for reuse by other MoE model families. It adds: - a structured `MoERouting` payload in modeling output\u2026",
-    "changed_files": 7,
+    "additions": 6971,
+    "author": "philippguevorguian",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44925",
-    "created_at": "2026-03-22T14:04:40Z",
-    "deletions": 24,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44975",
+    "created_at": "2026-03-24T17:12:31Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44925/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44925",
+    "files_url": "https://github.com/huggingface/transformers/pull/44975/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44975",
     "labels": [],
     "merged": false,
-    "number": 44925,
+    "number": 44975,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[MOE]  MoE routing capture and replay support",
-    "updated_at": "2026-04-13T09:04:44Z"
+    "state": "closed",
+    "title": "fix: rebase main; clean config reads, ImageProcessor backend, misc cleanup",
+    "updated_at": "2026-03-24T17:13:42Z"
   },
   {
-    "additions": 9,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix two `nogil` threading bugs (reproduced on 3.14) : 1. Continus Batching crashes with torch graph errors with 2 threads on 2 separate model instances (same model path, but two distinct instances). Cause is missing\u2026",
-    "changed_files": 1,
+    "additions": 428,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "TODO: - Saving seems to take a bit of time tho. Need investigation - Need to check if it works in 1D (FSDP or TP)and 2D (FSDP + TP). Running the script from https://github.com/huggingface/transformers/pull/44996 ``` (env_pr-44974-fsdp-core\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44924",
-    "created_at": "2026-03-22T11:46:49Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44974",
+    "created_at": "2026-03-24T16:13:25Z",
+    "deletions": 82,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44924/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44924",
+    "files_url": "https://github.com/huggingface/transformers/pull/44974/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44974",
     "labels": [],
-    "merged": true,
-    "number": 44924,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Continuous batching thread safety",
-    "updated_at": "2026-03-24T05:42:56Z"
+    "merged": false,
+    "number": 44974,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Refactor core_model_loading to support FSDP shard-on-read loading",
+    "updated_at": "2026-04-14T14:38:45Z"
   },
   {
-    "additions": 3,
-    "author": "prakhar-agarwal",
+    "additions": 22,
+    "author": "andylizf",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Addresses issue #44843. Verified with isolated repro logic. Changes made: Updated the logic to properly identify local and offline scenarios upfront. Now, is_local is correctly set to True if: 1. is_offline_mode() is active. 2. The local_f\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? Adds `.item()` to `max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()` in all vision attention modules that pass this value to `flash_attn_varlen_func`. ### Context On **released versions** (e.g. 4.52.4), using\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44923",
-    "created_at": "2026-03-22T05:20:22Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44923/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44923",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44973",
+    "created_at": "2026-03-24T15:42:32Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44973/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44973",
     "labels": [],
     "merged": false,
-    "number": 44923,
+    "number": 44973,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: avoid unconditional model_info call in _patch_mistral_regex",
-    "updated_at": "2026-03-22T05:24:11Z"
+    "title": "Fix max_seqlen type in vision attention for torch.compile + FA2",
+    "updated_at": "2026-03-25T14:12:50Z"
   },
   {
-    "additions": 10,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44849. When `output_hidden_states=True` (or `output_attentions=True`) is passed to `model.generate()`, the `@capture_outputs` decorator reads the flag value but leaves it in `**kwargs`. These flags then prop\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title. Updating Gemma3/Gemma3n expectations.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44922",
-    "created_at": "2026-03-22T01:21:22Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44972",
+    "created_at": "2026-03-24T15:11:50Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44922/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44922",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44922,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44972/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44972",
+    "labels": [],
+    "merged": true,
+    "number": 44972,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "fix: pop output_* flags from kwargs in capture_outputs to prevent submodule leakage",
-    "updated_at": "2026-03-23T12:38:56Z"
+    "title": "[AMD CI] Gemma3/Gemma3n Expectations",
+    "updated_at": "2026-03-24T16:30:03Z"
   },
   {
-    "additions": 4,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44918. `compute_3d_position_ids` in the Qwen2.5-VL / Qwen3-VL / Qwen3.5 model families destructures `inputs_embeds.shape` into exactly three variables: ```python batch_size, seq_length, _ = inputs_embeds.sha\u2026",
+    "additions": 0,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Removed the tokenizer_class attr was never there to begin with, and kwargs are now supported. This was failing some test on vllm ci. Fixes https://buildkite.com/vllm/ci/builds/57601/steps/canvas?sid=019d1aec-aa5a-41\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44921",
-    "created_at": "2026-03-22T00:39:01Z",
-    "deletions": 4,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44971",
+    "created_at": "2026-03-24T14:59:36Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44921/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44921",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44921,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44971/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44971",
+    "labels": [],
+    "merged": true,
+    "number": 44971,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: use shape index access in compute_3d_position_ids for Qwen VL models",
-    "updated_at": "2026-03-23T10:00:51Z"
+    "title": "[ `vllm x v5`] nit",
+    "updated_at": "2026-03-24T17:40:05Z"
   },
   {
-    "additions": 15,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `num_labels` not being propagated from `Qwen3_5Config` to its `text_config` when loading via `AutoConfig.from_pretrained(model, num_labels=N)`. **Root cause:** `Qwen3_5Config.__post_init__` initializes `text_\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44625-9",
-    "cluster_ids": [
-      "cluster-44625-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44920",
-    "created_at": "2026-03-22T00:01:59Z",
-    "deletions": 0,
+    "additions": 20,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44970",
+    "created_at": "2026-03-24T13:49:21Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44920/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44920",
+    "files_url": "https://github.com/huggingface/transformers/pull/44970/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44970",
     "labels": [],
-    "merged": false,
-    "number": 44920,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44970,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: propagate num_labels/id2label to text_config in Qwen3_5Config",
-    "updated_at": "2026-03-23T12:06:04Z"
+    "title": "Fix CPU 16 bytes alignment issue using equivalent fallback",
+    "updated_at": "2026-03-25T09:01:03Z"
   },
   {
-    "additions": 18,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `Qwen2_5_VLProcessor.__call__` when processing batched inputs without padding (`padding=False`). **Root cause:** When the tokenizer returns sequences of different lengths (ragged list), `np.array(t\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44919",
-    "created_at": "2026-03-21T23:57:37Z",
-    "deletions": 5,
+    "additions": 4,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Extends the CI so we can use Make and read toml files",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44968",
+    "created_at": "2026-03-24T11:43:24Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44919/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44919",
+    "files_url": "https://github.com/huggingface/transformers/pull/44968/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44968",
     "labels": [],
     "merged": false,
-    "number": 44919,
+    "number": 44968,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: handle ragged batch inputs in Qwen2_5_VLProcessor mm_token_type_ids computation",
-    "updated_at": "2026-03-23T10:38:30Z"
+    "title": "Added Make to the docker and `tomli` to `.[quality]`",
+    "updated_at": "2026-03-24T15:06:29Z"
   },
   {
-    "additions": 5,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary `GPTNeoXConfig.convert_rope_params_to_dict` unconditionally overwrote `rope_parameters[\"partial_rotary_factor\"]` with the default `0.25` when `rotary_pct` was absent from kwargs. On every `from_pretrained` call, `rotary_pct` is\u2026",
-    "changed_files": 1,
+    "additions": 87,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix: FA kernel launches currently are not thread-safe (nogil) in multi-gpu env. This simple patch fixes the issue. ```py # Set the correct CUDA context before launching the FlashAttention kernel. with torch.cuda.dev\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44917",
-    "created_at": "2026-03-21T23:34:32Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44917/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44917",
-    "labels": [
-      "Code agent slop"
-    ],
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44967",
+    "created_at": "2026-03-24T11:33:45Z",
+    "deletions": 84,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44967/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44967",
+    "labels": [],
     "merged": false,
-    "number": 44917,
+    "number": 44967,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(gpt-neox): preserve rotary_pct across save/load cycle",
-    "updated_at": "2026-03-23T12:37:48Z"
+    "title": "[WIP] Fix FA kernel launch needs correct cuda device ctx in multi-gpu env",
+    "updated_at": "2026-04-08T15:10:47Z"
   },
   {
     "additions": 8,
-    "author": "s-zx",
+    "author": "pramilajangid",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Importing `DebertaV2Model` (or anything that depends on it, e.g. `gliner`) raises `IndentationError` on Python 3.13 because `torch.jit.script` calls `inspect.getsource()`, dedents the snippet, and passes it to `ast.parse()`. Pyt\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes #44964 ## Summary This PR restores backward compatibility for `CommonKwargs` in `transformers.processing_utils`, which is still referenced by some remote processor implementations. ## Problem After the typed-dict cleanup (commit `533\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44916",
-    "created_at": "2026-03-21T23:34:07Z",
-    "deletions": 4,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44966",
+    "created_at": "2026-03-24T11:06:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44916/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44916",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44966/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44966",
+    "labels": [],
     "merged": false,
-    "number": 44916,
+    "number": 44966,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(deberta-v2): move \"Copied from\" comments above @torch.jit.script for Python 3.13 compat",
-    "updated_at": "2026-03-23T12:34:24Z"
+    "title": "Fix backward compatibility for CommonKwargs in processing_utils (brea\u2026",
+    "updated_at": "2026-03-24T12:48:44Z"
   },
   {
-    "additions": 90,
-    "author": "maxsloef-goodfire",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? `clean_up_tokenization` applies English-specific string replacements (` .` \u2192 `.`, ` ?` \u2192 `?`, ` ,` \u2192 `,`, etc.) to decoded text. This was designed for BERT-era WordPiece tokenizers where decoding produced artifacts\u2026",
-    "changed_files": 4,
+    "additions": 37,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44915",
-    "created_at": "2026-03-21T20:45:03Z",
-    "deletions": 6,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44965",
+    "created_at": "2026-03-24T10:59:31Z",
+    "deletions": 32,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44915/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44915",
+    "files_url": "https://github.com/huggingface/transformers/pull/44965/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44965",
     "labels": [],
     "merged": false,
-    "number": 44915,
-    "review_comments_count": 1,
+    "number": 44965,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix: skip `clean_up_tokenization` for BPE tokenizers in `PreTrainedTokenizerFast`",
-    "updated_at": "2026-04-09T13:39:02Z"
+    "title": "try",
+    "updated_at": "2026-03-24T11:19:27Z"
   },
   {
-    "additions": 1,
-    "author": "maxsloef-goodfire",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? The `Llama3Converter` in `convert_llama_weights_to_hf.py` hardcodes `clean_up_tokenization_spaces=True` (line 468). This causes `tokenizer.decode()` to silently strip spaces before punctuation for all converted Lla\u2026",
+    "additions": 3,
+    "author": "josh-kean",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an import in src/transformers/video_processing_utils.py that was causing the main build to fail Fixes # 44933 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44914",
-    "created_at": "2026-03-21T20:25:51Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44958",
+    "created_at": "2026-03-23T20:07:09Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44914/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44914",
+    "files_url": "https://github.com/huggingface/transformers/pull/44958/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44958",
     "labels": [],
-    "merged": true,
-    "number": 44914,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: set `clean_up_tokenization_spaces=False` in Llama 3 tokenizer conversion",
-    "updated_at": "2026-03-23T08:38:18Z"
+    "merged": false,
+    "number": 44958,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "fixed import error with PILImageResampling",
+    "updated_at": "2026-03-24T13:53:00Z"
   },
   {
-    "additions": 8,
-    "author": "ouroborosscr",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "Qwen3.5 uses 3D position_ids [3, batch, seq_len] for multi-dimensional rotary embedding. _is_packed_sequence() misinterprets this as a packed sequence, causing cu_seqlens to be constructed with 3x the actual token count. Flash attention th\u2026",
-    "changed_files": 1,
+    "additions": 1272,
+    "author": "bigshanedogg",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds native Transformers support for **[HyperCLOVA X SEED Think 14B](https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B)**, a 14.74B-parameter Korean reasoning LLM developed by NAVER Cloud. - relate\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44911",
-    "created_at": "2026-03-21T15:42:57Z",
-    "deletions": 4,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44956",
+    "created_at": "2026-03-23T19:34:30Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44911/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44911",
+    "files_url": "https://github.com/huggingface/transformers/pull/44956/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44956",
     "labels": [],
     "merged": false,
-    "number": 44911,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix flash attention crash with 3D position_ids (Qwen3.5)",
-    "updated_at": "2026-03-24T14:35:57Z"
+    "number": 44956,
+    "review_comments_count": 24,
+    "state": "open",
+    "title": "Add HyperCLOVAX SEED Think 14B",
+    "updated_at": "2026-04-12T06:17:53Z"
   },
   {
-    "additions": 1,
-    "author": "anshuS1310",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "The `get_scheduler` function was identifying the `inverse_sqrt` scheduler type but failing to pass `**scheduler_specific_kwargs` to the underlying `get_inverse_sqrt_schedule` function. This caused user-defined parameters like `timescale` t\u2026",
+    "additions": 0,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "removes outdated qa pipeline reference",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44909",
-    "created_at": "2026-03-21T09:59:07Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44954",
+    "created_at": "2026-03-23T17:20:37Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44909/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44909",
+    "files_url": "https://github.com/huggingface/transformers/pull/44954/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44954",
     "labels": [],
     "merged": true,
-    "number": 44909,
+    "number": 44954,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Update optimization.py",
-    "updated_at": "2026-03-24T13:06:15Z"
+    "title": "[docs] pipeline cleanup",
+    "updated_at": "2026-04-08T17:03:36Z"
   },
   {
-    "additions": 200,
-    "author": "syncdoth",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44906 ## Summary - Remove `.expand_as(inputs_embeds)` from placeholder mask creation in `get_placeholder_mask` and equivalent inline patterns across all VLM models. `masked_scatter` natively broadcasts `(B, S, 1)` \u2192 `(B, S, H)`, mak\u2026",
-    "changed_files": 71,
+    "additions": 861,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Decouples `kwargs` manipulation from hub's strict decorator, and ensures that all subclasses of a `PreTrainedConfig` accept any kwargs which is what we supported prev. Not all remote code has `@strict` or has an `__\u2026",
+    "changed_files": 536,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44907",
-    "created_at": "2026-03-21T06:07:35Z",
-    "deletions": 222,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44953",
+    "created_at": "2026-03-23T17:13:39Z",
+    "deletions": 824,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44907/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44907",
+    "files_url": "https://github.com/huggingface/transformers/pull/44953/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44953",
     "labels": [],
-    "merged": false,
-    "number": 44907,
+    "merged": true,
+    "number": 44953,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Remove unnecessary expand_as in get_placeholder_mask across VLMs",
-    "updated_at": "2026-03-23T12:20:03Z"
+    "state": "closed",
+    "title": "Config kwargs",
+    "updated_at": "2026-03-24T14:14:46Z"
   },
   {
-    "additions": 13,
-    "author": "NicoleRobin",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - 13 i18n README files used `./awesome-transformers.md` which resolves relative to the `i18n/` directory and leads to a 404 - Replace with the absolute GitHub URL so links work from any location - `README_ko.md` was already corr\u2026",
-    "changed_files": 13,
+    "additions": 10,
+    "author": "Jess-Co-Del",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes the non existence of output dictionary change, when parameter output_hidden_states=True is passed to models like CLIP or SigLip. This is especially pertinent for the vision model config. According to #42759 no\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44905",
-    "created_at": "2026-03-21T03:25:56Z",
-    "deletions": 13,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44952",
+    "created_at": "2026-03-23T17:02:50Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44905/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44905",
+    "files_url": "https://github.com/huggingface/transformers/pull/44952/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44952",
     "labels": [],
-    "merged": true,
-    "number": 44905,
+    "merged": false,
+    "number": 44952,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(i18n): replace broken relative links to awesome-transformers.md with absolute URLs",
-    "updated_at": "2026-03-23T12:47:56Z"
+    "state": "open",
+    "title": "Fix: Add correct return behaviour when output_hidden_states=True for CLIP and SIGLIP vision models",
+    "updated_at": "2026-03-24T11:19:35Z"
   },
   {
-    "additions": 101,
-    "author": "vivekvar-dl",
-    "author_association": "NONE",
-    "body_excerpt": "# Fix granite_speech config loading failure with int multiplier fields ## Fixes #44877 ### Problem Loading `granite_speech` configs fails with `StrictDataclassFieldValidationError` when multiplier fields (e.g., `embedding_multiplier`) are\u2026",
-    "changed_files": 3,
+    "additions": 113,
+    "author": "hemantmm",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This pull request adds routing replay functionality for mixture-of-experts (MoE) model types by giving users the option to override router probabilities while processing a forward pass through their models. <!-- Con\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44904",
-    "created_at": "2026-03-21T03:12:37Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44951",
+    "created_at": "2026-03-23T16:29:46Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44904/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44904",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44951/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44951",
+    "labels": [],
     "merged": false,
-    "number": 44904,
+    "number": 44951,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(granite_speech): convert int to float for multiplier fields in text_config",
-    "updated_at": "2026-03-23T10:37:38Z"
+    "state": "open",
+    "title": "feat: Add router_logits override to enable Routing Replay for MoE models",
+    "updated_at": "2026-03-26T12:36:20Z"
   },
   {
-    "additions": 16,
-    "author": "yonigozlan",
+    "additions": 1346,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some remote code models are using `get_size_dict` directly, and now that size is converted to SizeDict in init, we need to support it as input in `get_size_dict`",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. This PR finally makes mamba layer caches first class citizen, and adds native support for them. It supports the following layers combinations: - all mamba layers - alternating attention layer/mamba\u2026",
+    "changed_files": 64,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44903",
-    "created_at": "2026-03-21T01:25:53Z",
-    "deletions": 7,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44950",
+    "created_at": "2026-03-23T16:25:13Z",
+    "deletions": 4113,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44903/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44903",
+    "files_url": "https://github.com/huggingface/transformers/pull/44950/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44950",
     "labels": [],
     "merged": true,
-    "number": 44903,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Support SizeDict import in get_size_dict",
-    "updated_at": "2026-03-23T10:28:52Z"
-  },
-  {
-    "additions": 3,
-    "author": "guoyangzhen",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem `_split_tokens_on_unicode()` crashes with `IndexError: string index out of range` when the decoded token stream ends with a dangling Unicode replacement character (\\uFFFD). The computed index `unicode_offset + decoded.index(repl\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44902",
-    "created_at": "2026-03-20T22:08:49Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44902/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44902",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44902,
-    "review_comments_count": 0,
+    "number": 44950,
+    "review_comments_count": 48,
     "state": "closed",
-    "title": "fix: Whisper word timestamp OOB access on trailing replacement char",
-    "updated_at": "2026-03-23T11:59:14Z"
+    "title": "\ud83d\udea8 [Cache] Native mamba & hybrid cache",
+    "updated_at": "2026-03-31T13:09:44Z"
   },
   {
-    "additions": 19,
-    "author": "harshaljanjani",
+    "additions": 101,
+    "author": "Charly21r",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing Perceiver use case was identified and fixed in this PR: \u2192 c6d2848a23 ([\ud83d\udea8 Fix torch.jit.trace for interpolate_pos_encoding in all vision models](https://github.com/huggingface/transformers/pul\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes #44936 This PR fixes an issue with `NotebookProgressCallback` in the `Trainer` where calling evaluate() before or after training would crash due to the training tracker being `None`. The callback now properly\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44899",
-    "created_at": "2026-03-20T20:02:10Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44949",
+    "created_at": "2026-03-23T16:07:50Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44899/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44899",
+    "files_url": "https://github.com/huggingface/transformers/pull/44949/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44949",
     "labels": [],
     "merged": true,
-    "number": 44899,
-    "review_comments_count": 2,
+    "number": 44949,
+    "review_comments_count": 13,
     "state": "closed",
-    "title": "fix(models): Fix Perceiver interpolate_pos_encoding interpolating to the source size",
-    "updated_at": "2026-03-25T11:54:23Z"
+    "title": "Fix: NotebookProgressCallback crash when evaluating with the Trainer",
+    "updated_at": "2026-04-13T13:47:51Z"
   },
   {
-    "additions": 14,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add compatibility with remote code importing image_processing_utils_fast modules and methods using `from transformers.image_processing_utils_fast import ...`",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "heycorgi",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44897",
-    "created_at": "2026-03-20T19:30:32Z",
-    "deletions": 5,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44948",
+    "created_at": "2026-03-23T15:33:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44897/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44897",
+    "files_url": "https://github.com/huggingface/transformers/pull/44948/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44948",
     "labels": [],
-    "merged": true,
-    "number": 44897,
+    "merged": false,
+    "number": 44948,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add backward compatibility for direct imports from legacy `image_processing_utils_fast`",
-    "updated_at": "2026-03-20T20:00:12Z"
+    "title": "Create aa.py",
+    "updated_at": "2026-03-23T15:34:35Z"
   },
   {
-    "additions": 354,
-    "author": "stevhliu",
+    "additions": 123,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the continuous batching docs - new page for the API reference - adds sections for new features like CUDA graphs, async batching, prefix caching, logprobs (depending on when its merged) - clearer example of generation with varying l\u2026",
+    "body_excerpt": "# What does this PR do? The doc was generated by Claude. I deleted unnecessary repetitions and fixed a few moments to be more precise. We don't really need to merge it now so if you think the text is too LLM, feel free to take this as an i\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44896",
-    "created_at": "2026-03-20T19:09:41Z",
-    "deletions": 81,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44947",
+    "created_at": "2026-03-23T13:23:04Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44896/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44896",
+    "files_url": "https://github.com/huggingface/transformers/pull/44947/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44947",
     "labels": [],
     "merged": true,
-    "number": 44896,
-    "review_comments_count": 2,
+    "number": 44947,
+    "review_comments_count": 16,
     "state": "closed",
-    "title": "[docs] continuous batching",
-    "updated_at": "2026-03-30T17:17:13Z"
+    "title": "Add doc page for capturing outputs",
+    "updated_at": "2026-03-26T13:08:46Z"
   },
   {
-    "additions": 57,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR enables static FP8 experts. This also works on multi-gpu with device-map. A fix for that was to set was to set `torch.cuda.set_device()`. Triton's JIT compiler uses he active device context to determine whic\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44895",
-    "created_at": "2026-03-20T19:01:35Z",
-    "deletions": 10,
+    "additions": 14,
+    "author": "BSchilperoort",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 13,
+    "cluster_id": "cluster-44821-7",
+    "cluster_ids": [
+      "cluster-44821-7"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44946",
+    "created_at": "2026-03-23T12:18:34Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44895/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44895",
+    "files_url": "https://github.com/huggingface/transformers/pull/44946/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44946",
     "labels": [],
     "merged": true,
-    "number": 44895,
-    "review_comments_count": 4,
+    "number": 44946,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add static FP8 expert support ",
-    "updated_at": "2026-03-24T14:27:31Z"
+    "title": "Correct docstrings for `from_pretrained` (url input deprecated)",
+    "updated_at": "2026-03-23T13:05:16Z"
   },
   {
-    "additions": 10,
-    "author": "ydshieh",
+    "additions": 71,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "## Problem `ProcessorMixin.to_dict()` was calling `copy.deepcopy(self.__dict__)` on the entire processor, including the tokenizer, even though the tokenizer is always deleted from the output immediately after (since tokenizers are saved se\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? @hmellor",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44894",
-    "created_at": "2026-03-20T18:57:53Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44943",
+    "created_at": "2026-03-23T10:58:40Z",
     "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44894/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44894",
+    "files_url": "https://github.com/huggingface/transformers/pull/44943/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44943",
     "labels": [],
     "merged": true,
-    "number": 44894,
-    "review_comments_count": 0,
+    "number": 44943,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix `processing_utils.py`: avoid deepcopying tokenizer in `ProcessorMixin` to improve performance",
-    "updated_at": "2026-03-23T10:09:02Z"
+    "title": "Clearer type hints and fix rope validation in configs",
+    "updated_at": "2026-03-23T13:32:11Z"
   },
   {
-    "additions": 18,
-    "author": "ai-man-codes",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43011 `StaticLayer` was missing a `.crop(max_length)` method, so implemented that according to the discussion of the issue. Added `StaticLayer.crop(max_length)` to match the API of StaticCache with the Dynami\u2026",
-    "changed_files": 1,
+    "additions": 220,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44893",
-    "created_at": "2026-03-20T17:48:23Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44893/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44893",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44942",
+    "created_at": "2026-03-23T10:46:23Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44942/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44942",
     "labels": [],
     "merged": false,
-    "number": 44893,
+    "number": 44942,
     "review_comments_count": 0,
     "state": "open",
-    "title": "add `StaticLayer.crop()` to match `DynamicLayer` API",
-    "updated_at": "2026-03-28T01:41:22Z"
+    "title": "Add inference time layer fusion optimisations via `PreTrainedModel.from_pretrained(fuse_layers=True)`",
+    "updated_at": "2026-03-25T16:14:19Z"
   },
   {
-    "additions": 51,
-    "author": "he-yufeng",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44821 The `elif is_remote_url(...)` / `download_url(...)` branch in `get_image_processor_dict` was accidentally removed during the image processor refactor in #43514. This caused `AutoImageProcessor.from_pretrained(url)` to break wi\u2026",
-    "changed_files": 5,
-    "cluster_id": "cluster-44821-7",
-    "cluster_ids": [
-      "cluster-44821-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44892",
-    "created_at": "2026-03-20T16:21:25Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44892/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44892",
-    "labels": [],
-    "merged": false,
-    "number": 44892,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Fix AutoImageProcessor.from_pretrained failing on URL input",
-    "updated_at": "2026-03-24T13:30:38Z"
-  },
-  {
-    "additions": 507,
-    "author": "kashif",
+    "additions": 4,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add a MoERouterHealthCallback to log MoE router-health metrics. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? Fix the failing job after #43514 (the fix is effefctive, see [here](https://github.com/huggingface/transformers/actions/runs/23433395911/job/68165255513?pr=44941)) [Update Transformers metadata](https://github.com/h\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44891",
-    "created_at": "2026-03-20T16:17:05Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44941",
+    "created_at": "2026-03-23T10:42:09Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44891/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44891",
-    "labels": [],
-    "merged": false,
-    "number": 44891,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[Trainer] add MoERouterHealthCallback Callback",
-    "updated_at": "2026-03-20T16:28:43Z"
-  },
-  {
-    "additions": 72,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "As discussed on Slack, this is the first phase of our approach to controlling the code agent epidemic. This PR places large warnings in both the pull request template and `CONTRIBUTING.md`, which should hopefully be seen by most contributo\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44890",
-    "created_at": "2026-03-20T16:12:45Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44890/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44890",
+    "files_url": "https://github.com/huggingface/transformers/pull/44941/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44941",
     "labels": [],
     "merged": true,
-    "number": 44890,
-    "review_comments_count": 7,
+    "number": 44941,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add big angry code agent warnings!",
-    "updated_at": "2026-03-23T11:54:48Z"
+    "title": "Fix failing job `Update Transformers metadata` after #43514",
+    "updated_at": "2026-03-23T13:41:39Z"
   },
   {
-    "additions": 86,
-    "author": "roycho96",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Calling `trainer.evaluate()` before `trainer.train()` with DeepSpeed is broken in three ways: 1. **ZeRO-3 stale state crash:** `evaluate()` creates an inference engine. `train()` starts with `accelerator.free_memor\u2026",
-    "changed_files": 2,
+    "additions": 138,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Model loading of same model path but 2 different threads (2 different instances) have meta device tensor issues: unloaded meta/empty embedding/lm-head when it should not be empty post model load. Cause: `tie_weight(\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44889",
-    "created_at": "2026-03-20T15:08:32Z",
-    "deletions": 21,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44940",
+    "created_at": "2026-03-23T09:55:57Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44889/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44889",
+    "files_url": "https://github.com/huggingface/transformers/pull/44940/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44940",
     "labels": [],
     "merged": false,
-    "number": 44889,
-    "review_comments_count": 0,
+    "number": 44940,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "[DeepSpeed] Fix evaluate()/predict() before train()",
-    "updated_at": "2026-03-21T11:06:07Z"
+    "title": "Fix tie_weights skipping logic is not tied to model thread scope",
+    "updated_at": "2026-04-07T02:01:50Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
+    "additions": 2038,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? In general, it should be much better to let the kernel do what it wants for perfs! There's no reasons to have troubles from it!",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Refactored and cleaned up model linter - separated package - one rule per module - refactored legacy checks into their own rules - simplified pattern, duplication removal",
+    "changed_files": 25,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44888",
-    "created_at": "2026-03-20T14:45:28Z",
-    "deletions": 22,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44939",
+    "created_at": "2026-03-23T08:45:36Z",
+    "deletions": 1446,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44888/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44888",
+    "files_url": "https://github.com/huggingface/transformers/pull/44939/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44939",
     "labels": [],
-    "merged": false,
-    "number": 44888,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Remove explicit cuda stream in nemotron_h",
-    "updated_at": "2026-03-26T19:15:29Z"
+    "merged": true,
+    "number": 44939,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "refactor: mlinter as its own package",
+    "updated_at": "2026-03-24T07:56:15Z"
   },
   {
     "additions": 2,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. On currently pinned version, when we run this small snippet (which is called on some model's `__init__` functions \ud83d\ude05): ```python from transformers.integrations.hub_kernels import lazy_load_kernel ca\u2026",
-    "changed_files": 2,
+    "author": "VanshikaSohal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes two small but impactful bugs in the BART documentation: 1. **Variable shadowing bug**: In the Pipeline example, the variable was named `pipeline` which shadows the imported `pipeline` function. Renamed to `fi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44887",
-    "created_at": "2026-03-20T14:00:33Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44935",
+    "created_at": "2026-03-22T18:45:01Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44887/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44887",
+    "files_url": "https://github.com/huggingface/transformers/pull/44935/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44935",
     "labels": [],
     "merged": true,
-    "number": 44887,
-    "review_comments_count": 1,
+    "number": 44935,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Bump kernels version dependency to avoid crashes",
-    "updated_at": "2026-03-20T19:01:51Z"
+    "title": "Fix variable shadowing in pipeline example and typo in BART docs (BERT \u2192 BART)",
+    "updated_at": "2026-03-23T14:28:04Z"
   },
   {
-    "additions": 14,
-    "author": "m-matthias",
+    "additions": 9,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Prevent crash in class LwDetrImageLoss when using it with float16 automatic mixed precision on a Cuda device. torch.pow causes an autocast to float32 when used with Cuda, which caused a type mismatch at ``` pos_weig\u2026",
+    "body_excerpt": "# What does this PR do? Fixes this failing [T5ModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524758706#step:14:1449) & this [Qwen2IntegrationTest](https://github.com/huggingface/transformer\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44886",
-    "created_at": "2026-03-20T13:56:08Z",
-    "deletions": 12,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44934",
+    "created_at": "2026-03-22T18:03:34Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44886/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44886",
+    "files_url": "https://github.com/huggingface/transformers/pull/44934/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44934",
     "labels": [],
     "merged": true,
-    "number": 44886,
-    "review_comments_count": 4,
+    "number": 44934,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "LwDetrImageLoss: Fix dtype casting to prevent crash when using amp on cuda device",
-    "updated_at": "2026-03-24T17:02:32Z"
+    "title": "Fix failing `T5ModelIntegrationTest`",
+    "updated_at": "2026-03-24T14:50:10Z"
   },
   {
-    "additions": 2,
-    "author": "guoyangzhen",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem In _split_tokens_on_unicode(), when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD), the computed index can equal len(decoded_full), causing IndexError: string index out of range. The failing\u2026",
+    "additions": 1,
+    "author": "r266-tech",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44908 The `get_inverse_sqrt_schedule` function accepts `timescale` and `last_epoch` parameters, but `get_scheduler` was not forwarding `scheduler_specific_kwargs` to it. This caused user-provided kwargs like\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44885",
-    "created_at": "2026-03-20T13:03:54Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44932",
+    "created_at": "2026-03-22T17:30:56Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44885/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44885",
+    "files_url": "https://github.com/huggingface/transformers/pull/44932/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44932",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44885,
+    "number": 44932,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: prevent IndexError in Whisper word timestamp decode",
-    "updated_at": "2026-03-23T12:01:50Z"
+    "title": "Fix: Pass scheduler_specific_kwargs to inverse_sqrt scheduler",
+    "updated_at": "2026-03-23T12:44:16Z"
   },
   {
-    "additions": 14,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some libraries that use Transformers (i.e. vLLM) use `|` on the `size` config. This PR adds `__or__` and `__ror__` so that the following works: ```console $ {\"longest_edge\": 20} | SizeDict(height=10, width=20) {'longest_edge': 20, 'height'\u2026",
+    "additions": 1,
+    "author": "r266-tech",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a v5 regression where `CamembertForMaskedLM` (and all CamemBERT masked-LM tasks) produces near-zero, near-uniform logits, making the model completely non-functional. ### Root cause In v5, `modeling_utils.get_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44884",
-    "created_at": "2026-03-20T11:52:15Z",
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44931",
+    "created_at": "2026-03-22T17:28:57Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44884/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44884",
+    "files_url": "https://github.com/huggingface/transformers/pull/44931/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44931",
     "labels": [],
     "merged": true,
-    "number": 44884,
-    "review_comments_count": 2,
+    "number": 44931,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing dunder methods to `SizeDict`",
-    "updated_at": "2026-03-20T12:21:12Z"
+    "title": "fix(camembert): add tie_word_embeddings=True to CamembertConfig",
+    "updated_at": "2026-03-25T07:09:37Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/44589.",
-    "changed_files": 1,
+    "additions": 103,
+    "author": "javierdejesusda",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Fixes #44912 \u2014 MXFP4 quantization error messages combine `is_triton_available()` and `is_kernels_available()` into a single `kernels_available` boolean, making it impossible to identify which dependency is missing - Split the\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44883",
-    "created_at": "2026-03-20T11:43:13Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44930",
+    "created_at": "2026-03-22T17:27:20Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44883/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44883",
+    "files_url": "https://github.com/huggingface/transformers/pull/44930/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44930",
     "labels": [],
     "merged": true,
-    "number": 44883,
-    "review_comments_count": 2,
+    "number": 44930,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix dtype guessing from state dict",
-    "updated_at": "2026-03-20T13:12:34Z"
+    "title": "fix: split MXFP4 dependency checks for specific error messages",
+    "updated_at": "2026-03-24T15:33:14Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
+    "additions": 26,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "fixes ```python model = \"meta-llama/Llama-4-Maverick-17B-128E-Instruct\" tok_auto = AutoTokenizer.from_pretrained(model) print(f\"AutoTokenizer: {tok_auto('hello')}\") ``` ``` The above exception was the direct cause of the following exceptio\u2026",
+    "body_excerpt": "## Problem In `TokenizersBackend.convert_to_native_format()`, when a tokenizer has a custom `__init__` (the `elif` branch), `tokenizer.json` was parsed **twice**: 1. `TokenizerFast.from_file(fast_tokenizer_file)` \u2014 full Rust parse includin\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44882",
-    "created_at": "2026-03-20T11:31:20Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44927",
+    "created_at": "2026-03-22T15:33:23Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44882/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44882",
+    "files_url": "https://github.com/huggingface/transformers/pull/44927/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44927",
     "labels": [],
-    "merged": false,
-    "number": 44882,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44927,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "fix config type",
-    "updated_at": "2026-03-20T16:34:20Z"
+    "title": "fix: improve processor loading performance by avoiding redundant tokenizer parsing",
+    "updated_at": "2026-03-23T11:03:52Z"
   },
   {
-    "additions": 142,
-    "author": "zucchini-nlp",
+    "additions": 25,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, we don't need a weird way to filter out kwargs anymore because now we don't rely on `tokenizer.apply_chat_template`. I didn't delete the unused `TypedDict` yet and will deprecate for at least 3 minor r\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? Solve import errors when trying to import `from transformers.models.llama4.image_processing_llama4_fast import Llama4ImageProcessorFast` for example",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44881",
-    "created_at": "2026-03-20T10:44:06Z",
-    "deletions": 82,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44926",
+    "created_at": "2026-03-22T14:46:17Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44881/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44881",
+    "files_url": "https://github.com/huggingface/transformers/pull/44926/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44926",
     "labels": [],
     "merged": true,
-    "number": 44881,
-    "review_comments_count": 12,
+    "number": 44926,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Allow arbitrary template kwargs in processors",
-    "updated_at": "2026-03-27T11:07:08Z"
+    "title": "Fix backward compatibility for full path imports of Fast Image Processors",
+    "updated_at": "2026-03-23T14:16:49Z"
   },
   {
-    "additions": 34,
-    "author": "itazap",
+    "additions": 96,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "incorrect model list update",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR adds a first-class MoE routing capture/replay API for Qwen2Moe and introduces shared MoE routing helpers for reuse by other MoE model families. It adds: - a structured `MoERouting` payload in modeling output\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44880",
-    "created_at": "2026-03-20T10:37:13Z",
-    "deletions": 5,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44925",
+    "created_at": "2026-03-22T14:04:40Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44880/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44880",
+    "files_url": "https://github.com/huggingface/transformers/pull/44925/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44925",
     "labels": [],
     "merged": true,
-    "number": 44880,
+    "number": 44925,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "incorrect model list update",
-    "updated_at": "2026-03-24T09:27:24Z"
+    "title": "[Doc]  MoE routing capture and replay recipe ",
+    "updated_at": "2026-04-14T08:09:16Z"
   },
   {
-    "additions": 448,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - uses the Makefile as a single source of truth for running QA checks - adds `tomli` so `make` commands can read the `toml` file when needed - adds a `checkers` Python module that wraps and orchestrates all `checks`\u2026",
-    "changed_files": 7,
+    "additions": 9,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix two `nogil` threading bugs (reproduced on 3.14) : 1. Continus Batching crashes with torch graph errors with 2 threads on 2 separate model instances (same model path, but two distinct instances). Cause is missing\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44879",
-    "created_at": "2026-03-20T10:24:29Z",
-    "deletions": 90,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44924",
+    "created_at": "2026-03-22T11:46:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44879/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44879",
+    "files_url": "https://github.com/huggingface/transformers/pull/44924/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44924",
     "labels": [],
     "merged": true,
-    "number": 44879,
-    "review_comments_count": 6,
+    "number": 44924,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "refactor: unify QA calls",
-    "updated_at": "2026-03-25T08:51:30Z"
+    "title": "Continuous batching thread safety",
+    "updated_at": "2026-03-24T05:42:56Z"
   },
   {
-    "additions": 8,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `check_docstrings` has been complaining for a while about those.",
+    "additions": 3,
+    "author": "prakhar-agarwal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Addresses issue #44843. Verified with isolated repro logic. Changes made: Updated the logic to properly identify local and offline scenarios upfront. Now, is_local is correctly set to True if: 1. is_offline_mode() is active. 2. The local_f\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44878",
-    "created_at": "2026-03-20T10:01:08Z",
-    "deletions": 8,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44878/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44878",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44923",
+    "created_at": "2026-03-22T05:20:22Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44923/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44923",
     "labels": [],
-    "merged": true,
-    "number": 44878,
+    "merged": false,
+    "number": 44923,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix nemotron config docstrings",
-    "updated_at": "2026-03-20T10:11:04Z"
+    "state": "open",
+    "title": "fix: avoid unconditional model_info call in _patch_mistral_regex",
+    "updated_at": "2026-03-22T05:24:11Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 2,
+    "additions": 10,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44849. When `output_hidden_states=True` (or `output_attentions=True`) is passed to `model.generate()`, the `@capture_outputs` decorator reads the flag value but leaves it in `**kwargs`. These flags then prop\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44876",
-    "created_at": "2026-03-20T09:49:54Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44922",
+    "created_at": "2026-03-22T01:21:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44876/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44876",
-    "labels": [],
-    "merged": true,
-    "number": 44876,
+    "files_url": "https://github.com/huggingface/transformers/pull/44922/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44922",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44922,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix nemotron_h modular",
-    "updated_at": "2026-03-20T10:00:35Z"
+    "title": "fix: pop output_* flags from kwargs in capture_outputs to prevent submodule leakage",
+    "updated_at": "2026-03-23T12:38:56Z"
   },
   {
-    "additions": 872,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Refactors `src/transformers/cli/serve.py` to reduce nesting depth, eliminate code duplication, and improve maintainability. No behavioral changes and the public API is unchanged. Also added a module docstring to exp\u2026",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44918. `compute_3d_position_ids` in the Qwen2.5-VL / Qwen3-VL / Qwen3.5 model families destructures `inputs_embeds.shape` into exactly three variables: ```python batch_size, seq_length, _ = inputs_embeds.sha\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44875",
-    "created_at": "2026-03-20T09:06:34Z",
-    "deletions": 701,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44875/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44875",
-    "labels": [],
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44921",
+    "created_at": "2026-03-22T00:39:01Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44921/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44921",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44875,
+    "number": 44921,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor: improved the cli server module code organization",
-    "updated_at": "2026-03-23T08:08:17Z"
+    "state": "closed",
+    "title": "fix: use shape index access in compute_3d_position_ids for Qwen VL models",
+    "updated_at": "2026-03-23T10:00:51Z"
   },
   {
-    "additions": 2,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "`Llama4`'s was incorrect and causing `StrictDataclassFieldValidationErrors`. `AFMoE`'s was was fine but now it's more specific.",
+    "additions": 15,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `num_labels` not being propagated from `Qwen3_5Config` to its `text_config` when loading via `AutoConfig.from_pretrained(model, num_labels=N)`. **Root cause:** `Qwen3_5Config.__post_init__` initializes `text_\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
+    ],
+    "cluster_role": "member",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44874",
-    "created_at": "2026-03-20T09:05:02Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44920",
+    "created_at": "2026-03-22T00:01:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44874/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44874",
+    "files_url": "https://github.com/huggingface/transformers/pull/44920/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44920",
     "labels": [],
-    "merged": true,
-    "number": 44874,
+    "merged": false,
+    "number": 44920,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `layer_types` type hint for `AFMoE` and `Llama4`",
-    "updated_at": "2026-03-20T12:03:58Z"
+    "title": "fix: propagate num_labels/id2label to text_config in Qwen3_5Config",
+    "updated_at": "2026-03-23T12:06:04Z"
   },
   {
-    "additions": 75,
-    "author": "sergiopaniego",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? ## Problem Online RL training (GRPO, RLOO, PPO) with all VL models using MRoPE with rope_deltas (Qwen2-VL, Qwen2.5-VL, Qwen3-VL, Qwen3.5, GLM4V, PaddleOCR-VL, Ernie4.5-VL-MoE, etc.) crashes with `RuntimeError: Sizes\u2026",
-    "changed_files": 15,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44873",
-    "created_at": "2026-03-20T08:38:03Z",
-    "deletions": 30,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44873/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44873",
+    "additions": 18,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `Qwen2_5_VLProcessor.__call__` when processing batched inputs without padding (`padding=False`). **Root cause:** When the tokenizer returns sequences of different lengths (ragged list), `np.array(t\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44919",
+    "created_at": "2026-03-21T23:57:37Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44919/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44919",
     "labels": [],
-    "merged": true,
-    "number": 44873,
+    "merged": false,
+    "number": 44919,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix VL model rope_deltas batch size mismatch in online RL training",
-    "updated_at": "2026-03-20T13:51:08Z"
+    "title": "fix: handle ragged batch inputs in Qwen2_5_VLProcessor mm_token_type_ids computation",
+    "updated_at": "2026-03-23T10:38:30Z"
   },
   {
-    "additions": 2,
-    "author": "IvanFan-Van",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Update outdated comment that references non-existent file `generation_utils_samplers.py` ## Changes Detail - The comment on line 1200 states \"all samplers can be found in `generation_utils_samplers.py`\" - In reality, all sam\u2026",
+    "additions": 5,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary `GPTNeoXConfig.convert_rope_params_to_dict` unconditionally overwrote `rope_parameters[\"partial_rotary_factor\"]` with the default `0.25` when `rotary_pct` was absent from kwargs. On every `from_pretrained` call, `rotary_pct` is\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44872",
-    "created_at": "2026-03-20T05:45:46Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44917",
+    "created_at": "2026-03-21T23:34:32Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44872/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44872",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44917/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44917",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44872,
+    "number": 44917,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: Update outdated sampler comment in generation/utils.py",
-    "updated_at": "2026-03-20T05:45:46Z"
+    "state": "closed",
+    "title": "fix(gpt-neox): preserve rotary_pct across save/load cycle",
+    "updated_at": "2026-03-23T12:37:48Z"
   },
   {
-    "additions": 666,
-    "author": "JonusClapshaw",
+    "additions": 8,
+    "author": "s-zx",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #42200 `prediction_step` is type-hinted to return `Optional[torch.Tensor]` for logits, but when no `preprocess_logits_for_metrics` is provided it could return a tuple instead of a tensor. This caused `torch_pa\u2026",
-    "changed_files": 33,
+    "body_excerpt": "## Summary Importing `DebertaV2Model` (or anything that depends on it, e.g. `gliner`) raises `IndentationError` on Python 3.13 because `torch.jit.script` calls `inspect.getsource()`, dedents the snippet, and passes it to `ast.parse()`. Pyt\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44870",
-    "created_at": "2026-03-20T02:28:27Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44916",
+    "created_at": "2026-03-21T23:34:07Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44870/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44870",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44916/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44916",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44870,
+    "number": 44916,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: ensure prediction_step returns tensor for logits, not tuple #42200",
-    "updated_at": "2026-03-20T17:51:19Z"
+    "title": "fix(deberta-v2): move \"Copied from\" comments above @torch.jit.script for Python 3.13 compat",
+    "updated_at": "2026-03-23T12:34:24Z"
   },
   {
-    "additions": 98,
-    "author": "sdharani91",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44717 This PR fixes packed-sequence handling for the Qwen3.5 linear-attention fast path. Before this change, Qwen3.5 produced different outputs for: a padded representation of multiple sequences a packed repr\u2026",
-    "changed_files": 3,
+    "additions": 90,
+    "author": "maxsloef-goodfire",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? `clean_up_tokenization` applies English-specific string replacements (` .` \u2192 `.`, ` ?` \u2192 `?`, ` ,` \u2192 `,`, etc.) to decoded text. This was designed for BERT-era WordPiece tokenizers where decoding produced artifacts\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44867",
-    "created_at": "2026-03-19T17:31:45Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44915",
+    "created_at": "2026-03-21T20:45:03Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44867/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44867",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44915/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44915",
+    "labels": [],
     "merged": false,
-    "number": 44867,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels",
-    "updated_at": "2026-03-26T21:02:58Z"
+    "number": 44915,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "fix: skip `clean_up_tokenization` for BPE tokenizers in `PreTrainedTokenizerFast`",
+    "updated_at": "2026-04-09T13:39:02Z"
   },
   {
-    "additions": 78,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "maxsloef-goodfire",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? The `Llama3Converter` in `convert_llama_weights_to_hf.py` hardcodes `clean_up_tokenization_spaces=True` (line 468). This causes `tokenizer.decode()` to silently strip spaces before punctuation for all converted Lla\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44866",
-    "created_at": "2026-03-19T17:27:58Z",
-    "deletions": 75,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44914",
+    "created_at": "2026-03-21T20:25:51Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44866/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44866",
+    "files_url": "https://github.com/huggingface/transformers/pull/44914/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44914",
     "labels": [],
     "merged": true,
-    "number": 44866,
+    "number": 44914,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Align lfm2 cache to other mamba caches",
-    "updated_at": "2026-03-20T10:50:28Z"
+    "title": "fix: set `clean_up_tokenization_spaces=False` in Llama 3 tokenizer conversion",
+    "updated_at": "2026-03-23T08:38:18Z"
   },
   {
-    "additions": 496,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Added Rule 11 forward() must not access non-nn.Module attributes on submodules (breaks pipeline parallelism with Identity replacement). we want to make sure we just use metadata in config and elesewere when in that\u2026",
-    "changed_files": 10,
+    "additions": 8,
+    "author": "ouroborosscr",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Qwen3.5 uses 3D position_ids [3, batch, seq_len] for multi-dimensional rotary embedding. _is_packed_sequence() misinterprets this as a packed sequence, causing cu_seqlens to be constructed with 3x the actual token count. Flash attention th\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44865",
-    "created_at": "2026-03-19T16:39:59Z",
-    "deletions": 26,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44911",
+    "created_at": "2026-03-21T15:42:57Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44865/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44865",
+    "files_url": "https://github.com/huggingface/transformers/pull/44911/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44911",
     "labels": [],
-    "merged": true,
-    "number": 44865,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 44911,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(typing): added rule 11",
-    "updated_at": "2026-03-23T12:29:21Z"
+    "title": "Fix flash attention crash with 3D position_ids (Qwen3.5)",
+    "updated_at": "2026-03-24T14:35:57Z"
   },
   {
-    "additions": 99,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR switches FP8 per-tensor implementation to rely on the official torch impl `torch._scaled_mm`. Note that `torch._scaled_mm` don't explicitly support per tensor. We hack the api a bit as it only support per ro\u2026",
+    "additions": 1,
+    "author": "anshuS1310",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "The `get_scheduler` function was identifying the `inverse_sqrt` scheduler type but failing to pass `**scheduler_specific_kwargs` to the underlying `get_inverse_sqrt_schedule` function. This caused user-defined parameters like `timescale` t\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44864",
-    "created_at": "2026-03-19T16:19:53Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44909",
+    "created_at": "2026-03-21T09:59:07Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44864/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44864",
+    "files_url": "https://github.com/huggingface/transformers/pull/44909/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44909",
     "labels": [],
-    "merged": false,
-    "number": 44864,
+    "merged": true,
+    "number": 44909,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Switch FP8 per tensor quant to use `torch._scaled_mm`",
-    "updated_at": "2026-03-20T19:05:05Z"
+    "title": "Fix: Update optimization.py",
+    "updated_at": "2026-03-24T13:06:15Z"
   },
   {
-    "additions": 19,
-    "author": "gh-wf",
-    "author_association": "NONE",
-    "body_excerpt": "Some models (e.g. Nemotron-H) define `_tied_weights_keys` as a list, which caused `AttributeError: 'list' object has no attribute 'keys'` when calling `save_pretrained` during full finetuning. # What does this PR do? `_get_tied_weight_keys\u2026",
-    "changed_files": 2,
+    "additions": 200,
+    "author": "syncdoth",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44906 ## Summary - Remove `.expand_as(inputs_embeds)` from placeholder mask creation in `get_placeholder_mask` and equivalent inline patterns across all VLM models. `masked_scatter` natively broadcasts `(B, S, 1)` \u2192 `(B, S, H)`, mak\u2026",
+    "changed_files": 71,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44862",
-    "created_at": "2026-03-19T15:14:12Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44907",
+    "created_at": "2026-03-21T06:07:35Z",
+    "deletions": 222,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44862/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44862",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44907/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44907",
+    "labels": [],
     "merged": false,
-    "number": 44862,
+    "number": 44907,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle list-type _tied_weights_keys in _get_tied_weight_keys",
-    "updated_at": "2026-03-20T09:47:09Z"
+    "state": "open",
+    "title": "Remove unnecessary expand_as in get_placeholder_mask across VLMs",
+    "updated_at": "2026-03-23T12:20:03Z"
   },
   {
-    "additions": 11,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. As discussed offline with the Mistral team, the scaling applied to the query should not be the absolute one (old `cache_position`), but the actual `position_ids`, taking into account padding, packe\u2026",
-    "changed_files": 4,
+    "additions": 13,
+    "author": "NicoleRobin",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - 13 i18n README files used `./awesome-transformers.md` which resolves relative to the `i18n/` directory and leads to a 404 - Replace with the absolute GitHub URL so links work from any location - `README_ko.md` was already corr\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44860",
-    "created_at": "2026-03-19T14:27:33Z",
-    "deletions": 17,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44860/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44860",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44905",
+    "created_at": "2026-03-21T03:25:56Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44905/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44905",
     "labels": [],
     "merged": true,
-    "number": 44860,
+    "number": 44905,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Mistral] Fix query scaling for Mistral4 and Ministral3",
-    "updated_at": "2026-03-19T18:02:06Z"
+    "title": "fix(i18n): replace broken relative links to awesome-transformers.md with absolute URLs",
+    "updated_at": "2026-03-23T12:47:56Z"
   },
   {
-    "additions": 7001,
-    "author": "philippguevorguian",
+    "additions": 101,
+    "author": "vivekvar-dl",
     "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 19,
+    "body_excerpt": "# Fix granite_speech config loading failure with int multiplier fields ## Fixes #44877 ### Problem Loading `granite_speech` configs fails with `StrictDataclassFieldValidationError` when multiplier fields (e.g., `embedding_multiplier`) are\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44859",
-    "created_at": "2026-03-19T13:54:19Z",
-    "deletions": 138,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44904",
+    "created_at": "2026-03-21T03:12:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44859/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44859",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44904/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44904",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44859,
+    "number": 44904,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor: rope in model, flatten vision, rely on qwen3 backone, misc changes",
-    "updated_at": "2026-03-19T14:08:01Z"
+    "title": "fix(granite_speech): convert int to float for multiplier fields in text_config",
+    "updated_at": "2026-03-23T10:37:38Z"
   },
   {
-    "additions": 111,
-    "author": "remi-or",
+    "additions": 16,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "Right now, the continuous batching tests all use similar mechanisms, namely: 1. loading a model and a tokenizer 2. preparing data for generate or generate_batch 3. running generate to compare its outputs with generate_batch This PR adds 3\u2026",
+    "body_excerpt": "# What does this PR do? Some remote code models are using `get_size_dict` directly, and now that size is converted to SizeDict in init, we need to support it as input in `get_size_dict`",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44858",
-    "created_at": "2026-03-19T13:22:04Z",
-    "deletions": 188,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44903",
+    "created_at": "2026-03-21T01:25:53Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44858/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44858",
+    "files_url": "https://github.com/huggingface/transformers/pull/44903/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44903",
     "labels": [],
     "merged": true,
-    "number": 44858,
-    "review_comments_count": 0,
+    "number": 44903,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "[CB] [Minor] Simplify test suite",
-    "updated_at": "2026-03-24T11:44:39Z"
+    "title": "Support SizeDict import in get_size_dict",
+    "updated_at": "2026-03-23T10:28:52Z"
   },
   {
     "additions": 3,
-    "author": "hkc5",
+    "author": "guoyangzhen",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Python 3.13's stricter parser fails when there's a comment between the `@torch.jit.script` decorator and the function definition, causing an IndentationError when importing DebertaV2Model. ## Changes - Moved comments before the\u2026",
+    "body_excerpt": "## Problem `_split_tokens_on_unicode()` crashes with `IndexError: string index out of range` when the decoded token stream ends with a dangling Unicode replacement character (\\uFFFD). The computed index `unicode_offset + decoded.index(repl\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44856",
-    "created_at": "2026-03-19T12:33:00Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44902",
+    "created_at": "2026-03-20T22:08:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44856/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44856",
+    "files_url": "https://github.com/huggingface/transformers/pull/44902/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44902",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44856,
+    "number": 44902,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: move comments before @torch.jit.script decorator for Python 3.13 compatibility",
-    "updated_at": "2026-03-19T13:11:44Z"
+    "title": "fix: Whisper word timestamp OOB access on trailing replacement char",
+    "updated_at": "2026-03-23T11:59:14Z"
   },
   {
-    "additions": 63,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We had (flaky) ```bash tests/models/nemotron_h/test_modeling_nemotron_h.py::NemotronHModelTest::test_sdpa_can_compile_dynamic Fatal Python error: Segmentation fault ``` `NemotronHBlock.forward` creates a temporary `\u2026",
+    "additions": 19,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing Perceiver use case was identified and fixed in this PR: \u2192 c6d2848a23 ([\ud83d\udea8 Fix torch.jit.trace for interpolate_pos_encoding in all vision models](https://github.com/huggingface/transformers/pul\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44854",
-    "created_at": "2026-03-19T10:54:36Z",
-    "deletions": 56,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44899",
+    "created_at": "2026-03-20T20:02:10Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44854/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44854",
+    "files_url": "https://github.com/huggingface/transformers/pull/44899/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44899",
     "labels": [],
     "merged": true,
-    "number": 44854,
-    "review_comments_count": 5,
+    "number": 44899,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix core dumped when `NemotronH` is torch compiled",
-    "updated_at": "2026-03-20T14:29:16Z"
+    "title": "fix(models): Fix Perceiver interpolate_pos_encoding interpolating to the source size",
+    "updated_at": "2026-03-25T11:54:23Z"
   },
   {
-    "additions": 102,
-    "author": "sergiopaniego",
+    "additions": 14,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `Zamba2MambaMixer.__init__` calls `lazy_load_kernel(\"mamba-ssm\")` and `lazy_load_kernel(\"causal-conv1d\")` unconditionally. Models that inherit from it (like NemotronH) and set `use_mamba_kernels=False` in their conf\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Add compatibility with remote code importing image_processing_utils_fast modules and methods using `from transformers.image_processing_utils_fast import ...`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44853",
-    "created_at": "2026-03-19T10:22:40Z",
-    "deletions": 72,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44897",
+    "created_at": "2026-03-20T19:30:32Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44853/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44853",
+    "files_url": "https://github.com/huggingface/transformers/pull/44897/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44897",
     "labels": [],
-    "merged": false,
-    "number": 44853,
+    "merged": true,
+    "number": 44897,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Zamba2MambaMixer ignoring use_mamba_kernels=False",
-    "updated_at": "2026-04-10T12:18:45Z"
+    "state": "closed",
+    "title": "Add backward compatibility for direct imports from legacy `image_processing_utils_fast`",
+    "updated_at": "2026-03-20T20:00:12Z"
   },
   {
-    "additions": 117,
-    "author": "zucchini-nlp",
+    "additions": 354,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "An eos token can also be a list on most recent models, so this PR allows all `EOS` in config be a list as well. Same for q-lora-rank which apparently can be an explicit `None` for some model Also bring back `layer_type_validation` and add\u2026",
-    "changed_files": 92,
+    "body_excerpt": "updates the continuous batching docs - new page for the API reference - adds sections for new features like CUDA graphs, async batching, prefix caching, logprobs (depending on when its merged) - clearer example of generation with varying l\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44851",
-    "created_at": "2026-03-19T09:53:31Z",
-    "deletions": 101,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44896",
+    "created_at": "2026-03-20T19:09:41Z",
+    "deletions": 81,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44851/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44851",
+    "files_url": "https://github.com/huggingface/transformers/pull/44896/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44896",
     "labels": [],
     "merged": true,
-    "number": 44851,
+    "number": 44896,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "Update some type hints",
-    "updated_at": "2026-03-19T16:30:32Z"
+    "title": "[docs] continuous batching",
+    "updated_at": "2026-03-30T17:17:13Z"
   },
   {
-    "additions": 5,
-    "author": "vasqu",
+    "additions": 57,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "See #44458 This is a deep issue tbh - the cross attentions are reshaped into a different shape than the text input leading to a mismatch between batch sizes. This only gets noticed during compile as it is more strict about the concrete sha\u2026",
+    "body_excerpt": "# What does this PR do? This PR enables static FP8 experts. This also works on multi-gpu with device-map. A fix for that was to set was to set `torch.cuda.set_device()`. Triton's JIT compiler uses he active device context to determine whic\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44850",
-    "created_at": "2026-03-19T08:36:18Z",
-    "deletions": 6,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44850/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44850",
-    "labels": [],
-    "merged": false,
-    "number": 44850,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[`Mllama`] Fix workaround compile",
-    "updated_at": "2026-03-26T13:02:24Z"
-  },
-  {
-    "additions": 58,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update Qwen3_Omni_Moe, to fix these attribute errors [Qwen3OmniModelIntegrationTests](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524756897#step:14:1131) <img width=\"2292\" height=\"161\"\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 46,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44848",
-    "created_at": "2026-03-19T07:30:39Z",
-    "deletions": 38,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44895",
+    "created_at": "2026-03-20T19:01:35Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44848/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44848",
+    "files_url": "https://github.com/huggingface/transformers/pull/44895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44895",
     "labels": [],
     "merged": true,
-    "number": 44848,
-    "review_comments_count": 5,
+    "number": 44895,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix few issues in Qwen_3_Omni_Moe",
-    "updated_at": "2026-03-30T16:43:00Z"
+    "title": "Add static FP8 expert support ",
+    "updated_at": "2026-03-24T14:27:31Z"
   },
   {
-    "additions": 68,
-    "author": "tarekziade",
+    "additions": 10,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Activated `anti-slop` action. Enabled checks: - `min-account-age: 30` to catch brand-new throwaway accounts, which are common in automated spam waves. - `max-daily-forks: 7` to catch accounts that fork many reposito\u2026",
+    "body_excerpt": "## Problem `ProcessorMixin.to_dict()` was calling `copy.deepcopy(self.__dict__)` on the entire processor, including the tokenizer, even though the tokenizer is always deleted from the output immediately after (since tokenizers are saved se\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44847",
-    "created_at": "2026-03-19T07:15:38Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44894",
+    "created_at": "2026-03-20T18:57:53Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44847/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44847",
+    "files_url": "https://github.com/huggingface/transformers/pull/44894/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44894",
     "labels": [],
     "merged": true,
-    "number": 44847,
-    "review_comments_count": 3,
+    "number": 44894,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "ci: add anti-slop action",
-    "updated_at": "2026-03-26T13:39:58Z"
+    "title": "fix `processing_utils.py`: avoid deepcopying tokenizer in `ProcessorMixin` to improve performance",
+    "updated_at": "2026-03-23T10:09:02Z"
   },
   {
-    "additions": 64,
-    "author": "RicardoLee510520",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? Updated the DeiT model card to follow the new standardized format: - Replaced verbose paper abstract with concise model description - Added Pipeline and AutoModel usage examples - Renamed \"Usage tips\" to \"Notes\" - U\u2026",
+    "additions": 18,
+    "author": "ai-man-codes",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43011 `StaticLayer` was missing a `.crop(max_length)` method, so implemented that according to the discussion of the issue. Added `StaticLayer.crop(max_length)` to match the API of StaticCache with the Dynami\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44846",
-    "created_at": "2026-03-19T06:30:53Z",
-    "deletions": 90,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44846/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44846",
-    "labels": [],
-    "merged": false,
-    "number": 44846,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Docs] Update DeiT model card to new format",
-    "updated_at": "2026-03-20T05:30:17Z"
-  },
-  {
-    "additions": 15,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `torch.compile` failure for Mllama after #42848 introduced a new unified attention mask creation path. The root cause is a **torch inductor C++ codegen bug**: when `padding_mask_function` uses advanced tensor\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44845",
-    "created_at": "2026-03-19T06:14:54Z",
-    "deletions": 5,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44845/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44845",
-    "labels": [],
-    "merged": false,
-    "number": 44845,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fix Mllama torch.compile failure caused by new attention mask logic",
-    "updated_at": "2026-03-26T13:01:50Z"
-  },
-  {
-    "additions": 8468,
-    "author": "sahilleth",
-    "author_association": "NONE",
-    "body_excerpt": "This PR makes a few small fixes on top of #37875 for the DEIM model: - Ensure `DeimConfig` / `DEIMConfig` and `DeimModel` / `DeimForObjectDetection` are correctly exposed from the `transformers` package. - Fix a configuration docstring lin\u2026",
-    "changed_files": 15,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44844",
-    "created_at": "2026-03-19T05:50:29Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44893",
+    "created_at": "2026-03-20T17:48:23Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44844/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44844",
+    "files_url": "https://github.com/huggingface/transformers/pull/44893/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44893",
     "labels": [],
     "merged": false,
-    "number": 44844,
+    "number": 44893,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix DEIM config export and public API",
-    "updated_at": "2026-03-19T13:18:59Z"
+    "state": "open",
+    "title": "add `StaticLayer.crop()` to match `DynamicLayer` API",
+    "updated_at": "2026-03-28T01:41:22Z"
   },
   {
-    "additions": 26,
-    "author": "omyaaa1",
-    "author_association": "NONE",
-    "body_excerpt": "Reintroduce handling for remote URLs using download_url, which was accidentally removed in recent versions. This restores support for loading image processor configs directly from URLs. Fixes #44821 # What does this PR do? <!-- Congratulat\u2026",
-    "changed_files": 1,
+    "additions": 51,
+    "author": "he-yufeng",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44821 The `elif is_remote_url(...)` / `download_url(...)` branch in `get_image_processor_dict` was accidentally removed during the image processor refactor in #43514. This caused `AutoImageProcessor.from_pretrained(url)` to break wi\u2026",
+    "changed_files": 5,
     "cluster_id": "cluster-44821-7",
     "cluster_ids": [
       "cluster-44821-7"
     ],
     "cluster_role": "member",
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44842",
-    "created_at": "2026-03-19T04:48:58Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44892",
+    "created_at": "2026-03-20T16:21:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44842/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44842",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44892/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44892",
+    "labels": [],
     "merged": false,
-    "number": 44842,
-    "review_comments_count": 0,
+    "number": 44892,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix AutoImageProcessor URL loading regression",
-    "updated_at": "2026-03-19T11:57:50Z"
+    "title": "Fix AutoImageProcessor.from_pretrained failing on URL input",
+    "updated_at": "2026-03-24T13:30:38Z"
   },
   {
-    "additions": 1,
-    "author": "zhulinchng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fix formatting of code block in weightconverter.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set\u2026",
-    "changed_files": 1,
+    "additions": 507,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add a MoERouterHealthCallback to log MoE router-health metrics. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44839",
-    "created_at": "2026-03-19T01:40:58Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44891",
+    "created_at": "2026-03-20T16:17:05Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44839/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44839",
-    "labels": [],
-    "merged": true,
-    "number": 44839,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Correct code block formatting in weightconverter.md",
-    "updated_at": "2026-03-19T07:07:13Z"
-  },
-  {
-    "additions": 41,
-    "author": "xr843",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44821 \u2014 `AutoImageProcessor.from_pretrained` fails with `OSError: Repo id must be in the form 'repo_name' or 'namespace/repo_name'` when given a URL - The URL handling branch (`is_remote_url` check) in `get_image_proces\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44821-7",
-    "cluster_ids": [
-      "cluster-44821-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44838",
-    "created_at": "2026-03-18T23:53:54Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44838/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44838",
+    "files_url": "https://github.com/huggingface/transformers/pull/44891/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44891",
     "labels": [],
     "merged": false,
-    "number": 44838,
+    "number": 44891,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix AutoImageProcessor.from_pretrained failing with URL input",
-    "updated_at": "2026-03-19T10:43:53Z"
+    "state": "open",
+    "title": "[Trainer] add MoERouterHealthCallback Callback",
+    "updated_at": "2026-03-20T16:28:43Z"
   },
   {
-    "additions": 482,
-    "author": "stevhliu",
+    "additions": 72,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "backfills empty model cards like gptoss and nemotronh with a `model-card.md` skill i created. its pretty minimal at the moment and just includes a brief intro and code examples. let me know if there is anything else we should add!",
-    "changed_files": 12,
+    "body_excerpt": "As discussed on Slack, this is the first phase of our approach to controlling the code agent epidemic. This PR places large warnings in both the pull request template and `CONTRIBUTING.md`, which should hopefully be seen by most contributo\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44837",
-    "created_at": "2026-03-18T21:45:31Z",
-    "deletions": 102,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44890",
+    "created_at": "2026-03-20T16:12:45Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44837/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44837",
+    "files_url": "https://github.com/huggingface/transformers/pull/44890/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44890",
     "labels": [],
     "merged": true,
-    "number": 44837,
-    "review_comments_count": 0,
+    "number": 44890,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "[docs] model cards",
-    "updated_at": "2026-03-20T22:40:41Z"
+    "title": "Add big angry code agent warnings!",
+    "updated_at": "2026-03-23T11:54:48Z"
   },
   {
-    "additions": 96,
-    "author": "tyler-romero",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Pass cu_seqlens derived from packed attention masks to FLA's ShortConvolution and chunk_gated_delta_rule kernels, preventing recurrent state from leaking across sequence boundaries during packed-sequence training. F\u2026",
+    "additions": 86,
+    "author": "roycho96",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Calling `trainer.evaluate()` before `trainer.train()` with DeepSpeed is broken in three ways: 1. **ZeRO-3 stale state crash:** `evaluate()` creates an inference engine. `train()` starts with `accelerator.free_memor\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44836",
-    "created_at": "2026-03-18T20:24:58Z",
-    "deletions": 20,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44836/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44836",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44889",
+    "created_at": "2026-03-20T15:08:32Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44889/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44889",
     "labels": [],
     "merged": false,
-    "number": 44836,
-    "review_comments_count": 3,
+    "number": 44889,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Add cu_seqlens support to OlmoHybridGatedDeltaNet for packed sequences",
-    "updated_at": "2026-03-19T05:34:43Z"
+    "title": "[DeepSpeed] Fix evaluate()/predict() before train()",
+    "updated_at": "2026-03-21T11:06:07Z"
   },
   {
-    "additions": 187,
-    "author": "remi-or",
+    "additions": 2,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary This PR adds the `return_logprobs` flag to the continuous batching, enabling the user to retrieve the log probabilites of the tokens generated. # Tests Added a test to compare with regular generate and it passes. All tests pass.\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? In general, it should be much better to let the kernel do what it wants for perfs! There's no reasons to have troubles from it!",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44835",
-    "created_at": "2026-03-18T17:48:15Z",
-    "deletions": 83,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44888",
+    "created_at": "2026-03-20T14:45:28Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44835/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44835",
+    "files_url": "https://github.com/huggingface/transformers/pull/44888/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44888",
     "labels": [],
-    "merged": true,
-    "number": 44835,
+    "merged": false,
+    "number": 44888,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[CB] Add an option to return logprobs",
-    "updated_at": "2026-03-23T18:35:31Z"
+    "state": "open",
+    "title": "Remove explicit cuda stream in nemotron_h",
+    "updated_at": "2026-03-26T19:15:29Z"
   },
   {
-    "additions": 0,
+    "additions": 2,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Oups this slipped through in https://github.com/huggingface/transformers/pull/44833",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? As per the title. On currently pinned version, when we run this small snippet (which is called on some model's `__init__` functions \ud83d\ude05): ```python from transformers.integrations.hub_kernels import lazy_load_kernel ca\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44834",
-    "created_at": "2026-03-18T17:07:11Z",
-    "deletions": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44887",
+    "created_at": "2026-03-20T14:00:33Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44834/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44834",
+    "files_url": "https://github.com/huggingface/transformers/pull/44887/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44887",
     "labels": [],
     "merged": true,
-    "number": 44834,
-    "review_comments_count": 0,
+    "number": 44887,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Update more modular examples",
-    "updated_at": "2026-03-18T17:18:54Z"
+    "title": "Bump kernels version dependency to avoid crashes",
+    "updated_at": "2026-03-20T19:01:51Z"
   },
   {
-    "additions": 299,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the examples after the config change (https://github.com/huggingface/transformers/pull/41250), and re-run conversion as in general modelings changed quite a bit in the lib.",
-    "changed_files": 19,
+    "additions": 14,
+    "author": "m-matthias",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Prevent crash in class LwDetrImageLoss when using it with float16 automatic mixed precision on a Cuda device. torch.pow causes an autocast to float32 when used with Cuda, which caused a type mismatch at ``` pos_weig\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44833",
-    "created_at": "2026-03-18T16:35:34Z",
-    "deletions": 590,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44886",
+    "created_at": "2026-03-20T13:56:08Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44833/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44833",
+    "files_url": "https://github.com/huggingface/transformers/pull/44886/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44886",
     "labels": [],
     "merged": true,
-    "number": 44833,
+    "number": 44886,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "LwDetrImageLoss: Fix dtype casting to prevent crash when using amp on cuda device",
+    "updated_at": "2026-03-24T17:02:32Z"
+  },
+  {
+    "additions": 2,
+    "author": "guoyangzhen",
+    "author_association": "NONE",
+    "body_excerpt": "## Problem In _split_tokens_on_unicode(), when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD), the computed index can equal len(decoded_full), causing IndexError: string index out of range. The failing\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44885",
+    "created_at": "2026-03-20T13:03:54Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44885/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44885",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44885,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix and re-run modular converter on examples",
-    "updated_at": "2026-03-18T17:00:44Z"
+    "title": "fix: prevent IndexError in Whisper word timestamp decode",
+    "updated_at": "2026-03-23T12:01:50Z"
   },
   {
-    "additions": 346,
-    "author": "IlyasMoutawwakil",
+    "additions": 14,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "Some libraries that use Transformers (i.e. vLLM) use `|` on the `size` config. This PR adds `__or__` and `__ror__` so that the following works: ```console $ {\"longest_edge\": 20} | SizeDict(height=10, width=20) {'longest_edge': 20, 'height'\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44832",
-    "created_at": "2026-03-18T15:33:15Z",
-    "deletions": 176,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44884",
+    "created_at": "2026-03-20T11:52:15Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44832/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44832",
+    "files_url": "https://github.com/huggingface/transformers/pull/44884/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44884",
     "labels": [],
     "merged": true,
-    "number": 44832,
-    "review_comments_count": 37,
+    "number": 44884,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "DeepGEMM",
-    "updated_at": "2026-03-31T15:04:06Z"
+    "title": "Add missing dunder methods to `SizeDict`",
+    "updated_at": "2026-03-20T12:21:12Z"
   },
   {
     "additions": 2,
-    "author": "zucchini-nlp",
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix loading in Sam3 which currently doesn't match the state dict keys from checkpoint. Adding a correct base model prefix will add it to all state dict keys, making the ckpt load-able <!-- Congratulations! You've ma\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/44589.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44831",
-    "created_at": "2026-03-18T14:50:48Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44883",
+    "created_at": "2026-03-20T11:43:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44831/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44831",
+    "files_url": "https://github.com/huggingface/transformers/pull/44883/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44883",
     "labels": [],
     "merged": true,
-    "number": 44831,
-    "review_comments_count": 0,
+    "number": 44883,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix loading issue in Sam3",
-    "updated_at": "2026-03-18T15:44:01Z"
+    "title": "Fix dtype guessing from state dict",
+    "updated_at": "2026-03-20T13:12:34Z"
   },
   {
-    "additions": 2103,
-    "author": "lashahub",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR adds `AudioFlamingoNext` as a separate model name that inherits directly from `MusicFlamingo` #43538 and keeps the same architecture and behavior. Changes: - add `audioflamingonext` model files - register it in the auto mappings -\u2026",
-    "changed_files": 27,
+    "additions": 1,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "fixes ```python model = \"meta-llama/Llama-4-Maverick-17B-128E-Instruct\" tok_auto = AutoTokenizer.from_pretrained(model) print(f\"AutoTokenizer: {tok_auto('hello')}\") ``` ``` The above exception was the direct cause of the following exceptio\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44830",
-    "created_at": "2026-03-18T14:31:45Z",
-    "deletions": 48,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44882",
+    "created_at": "2026-03-20T11:31:20Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44830/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44830",
+    "files_url": "https://github.com/huggingface/transformers/pull/44882/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44882",
     "labels": [],
     "merged": false,
-    "number": 44830,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Add AudioFlamingoNext model",
-    "updated_at": "2026-04-13T07:40:23Z"
+    "number": 44882,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix config type",
+    "updated_at": "2026-03-20T16:34:20Z"
   },
   {
-    "additions": 101,
-    "author": "Cyrilvallez",
+    "additions": 142,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Last batch finally! Follow up of https://github.com/huggingface/transformers/pull/44759 and many other",
-    "changed_files": 33,
+    "body_excerpt": "# What does this PR do? As per title, we don't need a weird way to filter out kwargs anymore because now we don't rely on `tokenizer.apply_chat_template`. I didn't delete the unused `TypedDict` yet and will deprecate for at least 3 minor r\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44828",
-    "created_at": "2026-03-18T13:52:32Z",
-    "deletions": 512,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44881",
+    "created_at": "2026-03-20T10:44:06Z",
+    "deletions": 82,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44828/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44828",
+    "files_url": "https://github.com/huggingface/transformers/pull/44881/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44881",
     "labels": [],
     "merged": true,
-    "number": 44828,
-    "review_comments_count": 0,
+    "number": 44881,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Remove cache_position in more models (4 and last one)",
-    "updated_at": "2026-03-18T16:02:46Z"
+    "title": "Allow arbitrary template kwargs in processors",
+    "updated_at": "2026-03-27T11:07:08Z"
   },
   {
-    "additions": 80,
-    "author": "3outeille",
+    "additions": 34,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "https://github.com/huggingface/transformers/pull/44825",
-    "changed_files": 6,
+    "body_excerpt": "incorrect model list update",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44827",
-    "created_at": "2026-03-18T13:36:53Z",
-    "deletions": 14,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44827/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44827",
-    "labels": [],
-    "merged": false,
-    "number": 44827,
-    "review_comments_count": 6,
-    "state": "open",
-    "title": "Fix Mistral4 tests",
-    "updated_at": "2026-03-26T16:04:20Z"
-  },
-  {
-    "additions": 55,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44821 This PR fixes the issue where `AutoImageProcessor.from_pretrained()` was unable to load from a URL (e.g., `https://huggingface.co/.../raw/main/config.json`). The bug was introduced in transformers>=5.3.0. Prior versions (e.g.,\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44821-7",
-    "cluster_ids": [
-      "cluster-44821-7"
-    ],
-    "cluster_role": "member",
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44826",
-    "created_at": "2026-03-18T12:08:35Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44880",
+    "created_at": "2026-03-20T10:37:13Z",
     "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44826/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44826",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44826,
+    "files_url": "https://github.com/huggingface/transformers/pull/44880/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44880",
+    "labels": [],
+    "merged": true,
+    "number": 44880,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: allow AutoImageProcessor to load from URL",
-    "updated_at": "2026-03-19T13:28:04Z"
+    "title": "incorrect model list update",
+    "updated_at": "2026-03-24T09:27:24Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
+    "additions": 448,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. cc @3outeille as I know you're looking into it",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? - uses the Makefile as a single source of truth for running QA checks - adds `tomli` so `make` commands can read the `toml` file when needed - adds a `checkers` Python module that wraps and orchestrates all `checks`\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44825",
-    "created_at": "2026-03-18T12:05:35Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44879",
+    "created_at": "2026-03-20T10:24:29Z",
+    "deletions": 90,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44825/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44825",
+    "files_url": "https://github.com/huggingface/transformers/pull/44879/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44879",
     "labels": [],
     "merged": true,
-    "number": 44825,
-    "review_comments_count": 0,
+    "number": 44879,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "[CI] Temporarily skip Mistral4 tests as they almost all fail",
-    "updated_at": "2026-03-18T12:15:34Z"
+    "title": "refactor: unify QA calls",
+    "updated_at": "2026-03-25T08:51:30Z"
   },
   {
-    "additions": 5,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Fixes #44737 The `relative_positional_encoding` function in XLNet was computing all positional encodings on CPU every forward pass because the `torch.arange` calls were missing the `device` parameter. ## Changes - Added devi\u2026",
+    "additions": 8,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? `check_docstrings` has been complaining for a while about those.",
     "changed_files": 1,
-    "cluster_id": "cluster-44737-6",
-    "cluster_ids": [
-      "cluster-44737-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44824",
-    "created_at": "2026-03-18T11:55:01Z",
-    "deletions": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44878",
+    "created_at": "2026-03-20T10:01:08Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44824/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44824",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44824,
+    "files_url": "https://github.com/huggingface/transformers/pull/44878/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44878",
+    "labels": [],
+    "merged": true,
+    "number": 44878,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(xlnet): add device parameter to relative_positional_encoding",
-    "updated_at": "2026-03-18T13:17:38Z"
+    "title": "Fix nemotron config docstrings",
+    "updated_at": "2026-03-20T10:11:04Z"
   },
   {
-    "additions": 41,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes #44821 where `AutoImageProcessor.from_pretrained()` couldn't load from a direct URL to a config file. ## Problem When passing a URL like `https://huggingface.co/jinfengxie/BFMS_1014/raw/main/config.json` to `AutoImageProcesso\u2026",
+    "additions": 1,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
     "changed_files": 2,
-    "cluster_id": "cluster-44821-7",
-    "cluster_ids": [
-      "cluster-44821-7"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44823",
-    "created_at": "2026-03-18T11:54:24Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44876",
+    "created_at": "2026-03-20T09:49:54Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44823/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44823",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44823,
+    "files_url": "https://github.com/huggingface/transformers/pull/44876/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44876",
+    "labels": [],
+    "merged": true,
+    "number": 44876,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: AutoImageProcessor from URL loading",
-    "updated_at": "2026-03-18T13:17:48Z"
+    "title": "Fix nemotron_h modular",
+    "updated_at": "2026-03-20T10:00:35Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44737 The relative_positional_encoding method in XLNetModel was creating tensors using torch.arange() without specifying device=, causing the entire sinusoidal positional encoding computation to run on CPU every forward pass. Only t\u2026",
+    "additions": 872,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Refactors `src/transformers/cli/serve.py` to reduce nesting depth, eliminate code duplication, and improve maintainability. No behavioral changes and the public API is unchanged. Also added a module docstring to exp\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-44737-6",
-    "cluster_ids": [
-      "cluster-44737-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44822",
-    "created_at": "2026-03-18T11:48:28Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44822/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44822",
-    "labels": [
-      "Code agent slop"
-    ],
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44875",
+    "created_at": "2026-03-20T09:06:34Z",
+    "deletions": 701,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44875/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44875",
+    "labels": [],
     "merged": false,
-    "number": 44822,
+    "number": 44875,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: XLNet relative_positional_encoding device placement",
-    "updated_at": "2026-03-18T13:17:30Z"
+    "state": "open",
+    "title": "refactor: improved the cli server module code organization",
+    "updated_at": "2026-03-23T08:08:17Z"
   },
   {
-    "additions": 14,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44811 ## Problem When calling `processor.batch_decode(predicted_ids, skip_special_tokens=False)` with the output from `model.generate()` (without `return_dict_in_generate=True`), the `skip_special_tokens` parameter was being ignored\u2026",
-    "changed_files": 1,
+    "additions": 2,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "`Llama4`'s was incorrect and causing `StrictDataclassFieldValidationErrors`. `AFMoE`'s was was fine but now it's more specific.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44820",
-    "created_at": "2026-03-18T10:57:12Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44874",
+    "created_at": "2026-03-20T09:05:02Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44820/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44820",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44820,
+    "files_url": "https://github.com/huggingface/transformers/pull/44874/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44874",
+    "labels": [],
+    "merged": true,
+    "number": 44874,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(whisper): respect skip_special_tokens in batch_decode",
-    "updated_at": "2026-03-18T13:17:20Z"
+    "title": "Fix `layer_types` type hint for `AFMoE` and `Llama4`",
+    "updated_at": "2026-03-20T12:03:58Z"
   },
   {
-    "additions": 2,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description This PR fixes the DeepSeek tokenizer issue (#44779) where tokenization produces incorrect results in v5. ### Problem In transformers v5, the DeepSeek tokenizer (DeepSeek-R1) was producing incorrect results: - Input: \"How are\u2026",
-    "changed_files": 1,
+    "additions": 75,
+    "author": "sergiopaniego",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? ## Problem Online RL training (GRPO, RLOO, PPO) with all VL models using MRoPE with rope_deltas (Qwen2-VL, Qwen2.5-VL, Qwen3-VL, Qwen3.5, GLM4V, PaddleOCR-VL, Ernie4.5-VL-MoE, etc.) crashes with `RuntimeError: Sizes\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44819",
-    "created_at": "2026-03-18T10:55:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44873",
+    "created_at": "2026-03-20T08:38:03Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44819/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44819",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44819,
+    "files_url": "https://github.com/huggingface/transformers/pull/44873/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44873",
+    "labels": [],
+    "merged": true,
+    "number": 44873,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(tokenizer): add deepseek_v2 and deepseek_v3 to incorrect hub tokenizer class list",
-    "updated_at": "2026-03-18T14:11:16Z"
+    "title": "Fix VL model rope_deltas batch size mismatch in online RL training",
+    "updated_at": "2026-03-20T13:51:08Z"
   },
   {
-    "additions": 64,
-    "author": "BillionClaw",
+    "additions": 2,
+    "author": "IvanFan-Van",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Fixes #44805 When training multimodal models (Qwen3-VL, GLM-4.6V, Qwen3-VL-MoE) with LoRA adapters, the `attention_mask` and `mm_token_type_ids` tensors can have different shapes. This causes an IndexError when the `get_rope\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Description Update outdated comment that references non-existent file `generation_utils_samplers.py` ## Changes Detail - The comment on line 1200 states \"all samplers can be found in `generation_utils_samplers.py`\" - In reality, all sam\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44818",
-    "created_at": "2026-03-18T10:46:22Z",
-    "deletions": 7,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44872",
+    "created_at": "2026-03-20T05:45:46Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44818/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44818",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44872/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44872",
+    "labels": [],
     "merged": false,
-    "number": 44818,
+    "number": 44872,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: resolve mask shape mismatch IndexError in multimodal VL models",
-    "updated_at": "2026-03-18T10:51:43Z"
+    "state": "open",
+    "title": "Fix: Update outdated sampler comment in generation/utils.py",
+    "updated_at": "2026-03-20T05:45:46Z"
   },
   {
-    "additions": 28,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/97. This PR adds `enable_thinking` to the chat-template kwargs. With this change, `enable_thinking` is treated as a template-level argument in the tokenize=True path, so\u2026",
-    "changed_files": 2,
+    "additions": 666,
+    "author": "JonusClapshaw",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes #42200 `prediction_step` is type-hinted to return `Optional[torch.Tensor]` for logits, but when no `preprocess_logits_for_metrics` is provided it could return a tuple instead of a tensor. This caused `torch_pa\u2026",
+    "changed_files": 33,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44817",
-    "created_at": "2026-03-18T10:44:11Z",
-    "deletions": 6,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44870",
+    "created_at": "2026-03-20T02:28:27Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44817/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44817",
+    "files_url": "https://github.com/huggingface/transformers/pull/44870/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44870",
     "labels": [],
     "merged": false,
-    "number": 44817,
+    "number": 44870,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Misc] add enable_thinking to template kwargs",
-    "updated_at": "2026-03-20T14:56:04Z"
+    "title": "fix: ensure prediction_step returns tensor for logits, not tuple #42200",
+    "updated_at": "2026-03-20T17:51:19Z"
   },
   {
     "additions": 98,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This is the last of many PR to remove the `cache_position`. At this point, all the models were already updated to not use them, and they are fully ignored in all the modelings. So this removes thei\u2026",
-    "changed_files": 57,
+    "author": "sdharani91",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44717 This PR fixes packed-sequence handling for the Qwen3.5 linear-attention fast path. Before this change, Qwen3.5 produced different outputs for: a padded representation of multiple sequences a packed repr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44816",
-    "created_at": "2026-03-18T10:32:04Z",
-    "deletions": 375,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44867",
+    "created_at": "2026-03-19T17:31:45Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44816/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44816",
-    "labels": [],
-    "merged": true,
-    "number": 44816,
-    "review_comments_count": 14,
-    "state": "closed",
-    "title": "[generate] Never use `cache_position` anymore in generation",
-    "updated_at": "2026-03-19T14:18:28Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44867/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44867",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44867,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels",
+    "updated_at": "2026-03-26T21:02:58Z"
   },
   {
-    "additions": 135,
-    "author": "ArthurZucker",
+    "additions": 78,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44815",
-    "created_at": "2026-03-18T09:54:18Z",
-    "deletions": 23,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44815/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44815",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44866",
+    "created_at": "2026-03-19T17:27:58Z",
+    "deletions": 75,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44866/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44866",
     "labels": [],
-    "merged": false,
-    "number": 44815,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "Dequant fix",
-    "updated_at": "2026-03-24T14:39:52Z"
+    "merged": true,
+    "number": 44866,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Align lfm2 cache to other mamba caches",
+    "updated_at": "2026-03-20T10:50:28Z"
   },
   {
-    "additions": 2,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44792 This PR fixes the failing test `test_model_generate_images` for the Janus model. ## Problem When generating images with the Janus model, `generation_config.num_return_sequences` and `generation_config.max_length` can be `None`\u2026",
+    "additions": 496,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Added Rule 11 forward() must not access non-nn.Module attributes on submodules (breaks pipeline parallelism with Identity replacement). we want to make sure we just use metadata in config and elesewere when in that\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44865",
+    "created_at": "2026-03-19T16:39:59Z",
+    "deletions": 26,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44865/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44865",
+    "labels": [],
+    "merged": true,
+    "number": 44865,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "chore(typing): added rule 11",
+    "updated_at": "2026-03-23T12:29:21Z"
+  },
+  {
+    "additions": 99,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR switches FP8 per-tensor implementation to rely on the official torch impl `torch._scaled_mm`. Note that `torch._scaled_mm` don't explicitly support per tensor. We hack the api a bit as it only support per ro\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44814",
-    "created_at": "2026-03-18T09:51:34Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44864",
+    "created_at": "2026-03-19T16:19:53Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44814/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44814",
+    "files_url": "https://github.com/huggingface/transformers/pull/44864/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44864",
     "labels": [],
     "merged": false,
-    "number": 44814,
+    "number": 44864,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(janus): handle None values in image generation mode",
-    "updated_at": "2026-03-18T10:42:50Z"
+    "title": "Switch FP8 per tensor quant to use `torch._scaled_mm`",
+    "updated_at": "2026-03-20T19:05:05Z"
   },
   {
-    "additions": 20,
-    "author": "hf-security-analysis[bot]",
+    "additions": 19,
+    "author": "gh-wf",
     "author_association": "NONE",
-    "body_excerpt": "Update `.github/workflows/pr-repo-consistency-bot.yml` workflow configuration. cc @ydshieh Closes huggingface/tracking-issues#26",
-    "changed_files": 1,
+    "body_excerpt": "Some models (e.g. Nemotron-H) define `_tied_weights_keys` as a list, which caused `AttributeError: 'list' object has no attribute 'keys'` when calling `save_pretrained` during full finetuning. # What does this PR do? `_get_tied_weight_keys\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44813",
-    "created_at": "2026-03-18T09:49:16Z",
-    "deletions": 46,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44862",
+    "created_at": "2026-03-19T15:14:12Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44813/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44813",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44862/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44862",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44813,
+    "number": 44862,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: update pr-repo-consistency-bot.yml",
-    "updated_at": "2026-03-18T10:08:04Z"
+    "title": "fix: handle list-type _tied_weights_keys in _get_tied_weight_keys",
+    "updated_at": "2026-03-20T09:47:09Z"
   },
   {
-    "additions": 7,
-    "author": "ydshieh",
+    "additions": 11,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some checks (for example, modular checks) really require the installation from PR branch.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. As discussed offline with the Mistral team, the scaling applied to the query should not be the absolute one (old `cache_position`), but the actual `position_ids`, taking into account padding, packe\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44812",
-    "created_at": "2026-03-18T09:40:36Z",
-    "deletions": 0,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44860",
+    "created_at": "2026-03-19T14:27:33Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44812/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44812",
+    "files_url": "https://github.com/huggingface/transformers/pull/44860/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44860",
     "labels": [],
     "merged": true,
-    "number": 44812,
+    "number": 44860,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix repo-check bot",
-    "updated_at": "2026-03-18T09:49:50Z"
+    "title": "[Mistral] Fix query scaling for Mistral4 and Ministral3",
+    "updated_at": "2026-03-19T18:02:06Z"
   },
   {
-    "additions": 7,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed bare except clause in _safe_convert_tensor function to catch only Exception type.",
-    "changed_files": 2,
+    "additions": 7001,
+    "author": "philippguevorguian",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44809",
-    "created_at": "2026-03-18T05:47:03Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44859",
+    "created_at": "2026-03-19T13:54:19Z",
+    "deletions": 138,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44809/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44809",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44859/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44859",
+    "labels": [],
     "merged": false,
-    "number": 44809,
+    "number": 44859,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace bare except with Exception in Fuyu image processing",
-    "updated_at": "2026-03-18T13:17:11Z"
+    "title": "refactor: rope in model, flatten vision, rely on qwen3 backone, misc changes",
+    "updated_at": "2026-03-19T14:08:01Z"
   },
   {
-    "additions": 2759,
-    "author": "zhang-prog",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 31,
+    "additions": 111,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "Right now, the continuous batching tests all use similar mechanisms, namely: 1. loading a model and a tokenizer 2. preparing data for generate or generate_batch 3. running generate to compare its outputs with generate_batch This PR adds 3\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44808",
-    "created_at": "2026-03-18T04:29:07Z",
-    "deletions": 21,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44858",
+    "created_at": "2026-03-19T13:22:04Z",
+    "deletions": 188,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44808/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44808",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44858/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44858",
+    "labels": [],
     "merged": true,
-    "number": 44808,
-    "review_comments_count": 53,
+    "number": 44858,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Model] Add PP-OCRv5_server_rec and  PP-OCRv5_mobile_rec models Support",
-    "updated_at": "2026-03-18T20:24:50Z"
+    "title": "[CB] [Minor] Simplify test suite",
+    "updated_at": "2026-03-24T11:44:39Z"
   },
   {
-    "additions": 0,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The question-answering pipeline was removed in v5.3 per the migration guide, but the Chinese, Korean, and French quicktour docs still listed it as an available pipeline task. This removes those outdated references to avoid confusing users\u2026",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Python 3.13's stricter parser fails when there's a comment between the `@torch.jit.script` decorator and the function definition, causing an IndentationError when importing DebertaV2Model. ## Changes - Moved comments before the\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44807",
-    "created_at": "2026-03-18T03:41:52Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44856",
+    "created_at": "2026-03-19T12:33:00Z",
     "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44807/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44807",
+    "files_url": "https://github.com/huggingface/transformers/pull/44856/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44856",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44807,
+    "number": 44856,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs(quicktour): remove question-answering pipeline from quicktour tables",
-    "updated_at": "2026-03-18T15:38:09Z"
+    "title": "fix: move comments before @torch.jit.script decorator for Python 3.13 compatibility",
+    "updated_at": "2026-03-19T13:11:44Z"
   },
   {
-    "additions": 23,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes OOM errors when loading models on AMD Strix Halo APUs. ## Problem AMD Strix Halo (Radeon 8060S/8050S) uses unified memory architecture where memory-mapped file loading doesn't work well with the current amdgpu driver. This ca\u2026",
-    "changed_files": 1,
+    "additions": 63,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We had (flaky) ```bash tests/models/nemotron_h/test_modeling_nemotron_h.py::NemotronHModelTest::test_sdpa_can_compile_dynamic Fatal Python error: Segmentation fault ``` `NemotronHBlock.forward` creates a temporary `\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44806",
-    "created_at": "2026-03-18T03:33:59Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44854",
+    "created_at": "2026-03-19T10:54:36Z",
+    "deletions": 56,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44806/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44806",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44806,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44854/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44854",
+    "labels": [],
+    "merged": true,
+    "number": 44854,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-18T12:30:21Z"
+    "title": "Fix core dumped when `NemotronH` is torch compiled",
+    "updated_at": "2026-03-20T14:29:16Z"
   },
   {
-    "additions": 137,
-    "author": "stevhliu",
+    "additions": 102,
+    "author": "sergiopaniego",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the peft docs: - a more complete training section with a full code snippet, describe saving behavior, resuming from a checkpoint, and distributed training - adds some undocumented API methods (`delete_adapter`, `active_adapters`) -\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? `Zamba2MambaMixer.__init__` calls `lazy_load_kernel(\"mamba-ssm\")` and `lazy_load_kernel(\"causal-conv1d\")` unconditionally. Models that inherit from it (like NemotronH) and set `use_mamba_kernels=False` in their conf\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44804",
-    "created_at": "2026-03-18T00:08:54Z",
-    "deletions": 89,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44853",
+    "created_at": "2026-03-19T10:22:40Z",
+    "deletions": 72,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44804/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44804",
+    "files_url": "https://github.com/huggingface/transformers/pull/44853/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44853",
     "labels": [],
-    "merged": true,
-    "number": 44804,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "[docs] peft",
-    "updated_at": "2026-03-23T17:14:58Z"
+    "merged": false,
+    "number": 44853,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix Zamba2MambaMixer ignoring use_mamba_kernels=False",
+    "updated_at": "2026-04-10T12:18:45Z"
   },
   {
-    "additions": 1341,
-    "author": "yonigozlan",
+    "additions": 117,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? (Finally) add support for checking+fixing both generated files and modular files in `check_auto_docstrings`. Also `auto_docstring` was recently added to configs, and this PR updates `check_auto_docstrings` to suppor\u2026",
-    "changed_files": 244,
+    "body_excerpt": "An eos token can also be a list on most recent models, so this PR allows all `EOS` in config be a list as well. Same for q-lora-rank which apparently can be an explicit `None` for some model Also bring back `layer_type_validation` and add\u2026",
+    "changed_files": 92,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44803",
-    "created_at": "2026-03-17T22:40:45Z",
-    "deletions": 1105,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44851",
+    "created_at": "2026-03-19T09:53:31Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44803/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44803",
+    "files_url": "https://github.com/huggingface/transformers/pull/44851/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44851",
     "labels": [],
     "merged": true,
-    "number": 44803,
-    "review_comments_count": 24,
+    "number": 44851,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Support Modular (!!) + Configs in `check_auto_docstrings`",
-    "updated_at": "2026-03-24T17:59:12Z"
+    "title": "Update some type hints",
+    "updated_at": "2026-03-19T16:30:32Z"
   },
   {
-    "additions": 12,
-    "author": "itazap",
+    "additions": 5,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub fixes: https://github.com/huggingface/transformers/issues/44779, https://github.com/huggingface/transformers/pull/44783",
-    "changed_files": 2,
+    "body_excerpt": "See #44458 This is a deep issue tbh - the cross attentions are reshaped into a different shape than the text input leading to a mismatch between batch sizes. This only gets noticed during compile as it is more strict about the concrete sha\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44801",
-    "created_at": "2026-03-17T17:40:25Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44801/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44801",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44850",
+    "created_at": "2026-03-19T08:36:18Z",
+    "deletions": 6,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44850/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44850",
     "labels": [],
-    "merged": true,
-    "number": 44801,
+    "merged": false,
+    "number": 44850,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub",
-    "updated_at": "2026-03-19T13:11:54Z"
+    "title": "[`Mllama`] Fix workaround compile",
+    "updated_at": "2026-03-26T13:02:24Z"
   },
   {
-    "additions": 36,
-    "author": "aayushbaluni",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44488 `CamembertTokenizer` raised `ValueError: too many values to unpack (expected 2)` when loading models like `cjvt/sleng-bert` that provide vocab as a dict `{token: id}` from `tokenizer.json` (BPE format). The tokenize\u2026",
-    "changed_files": 2,
+    "additions": 58,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update Qwen3_Omni_Moe, to fix these attribute errors [Qwen3OmniModelIntegrationTests](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524756897#step:14:1131) <img width=\"2292\" height=\"161\"\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44800",
-    "created_at": "2026-03-17T17:20:35Z",
-    "deletions": 2,
+    "comments_count": 46,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44848",
+    "created_at": "2026-03-19T07:30:39Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44800/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44800",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44800,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44848/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44848",
+    "labels": [],
+    "merged": true,
+    "number": 44848,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "fix: handle dict vocab in CamembertTokenizer for tokenizer.json (#44488)",
-    "updated_at": "2026-03-18T15:37:54Z"
+    "title": "Fix few issues in Qwen_3_Omni_Moe",
+    "updated_at": "2026-03-30T16:43:00Z"
   },
   {
-    "additions": 327,
-    "author": "stevhliu",
+    "additions": 68,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the Hardware section of the docs for training: - combined CPU/Distributed CPU into a single doc - add more info to the Gaudi doc (mixed precision, torch.compile, distributed training) - add more info to the MPS doc (mixed precision\u2026",
-    "changed_files": 10,
+    "body_excerpt": "# What does this PR do? Activated `anti-slop` action. Enabled checks: - `min-account-age: 30` to catch brand-new throwaway accounts, which are common in automated spam waves. - `max-daily-forks: 7` to catch accounts that fork many reposito\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44799",
-    "created_at": "2026-03-17T17:19:51Z",
-    "deletions": 574,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44847",
+    "created_at": "2026-03-19T07:15:38Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44799/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44799",
+    "files_url": "https://github.com/huggingface/transformers/pull/44847/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44847",
     "labels": [],
     "merged": true,
-    "number": 44799,
-    "review_comments_count": 24,
+    "number": 44847,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[docs] training on specific hardware",
-    "updated_at": "2026-04-13T07:12:08Z"
+    "title": "ci: add anti-slop action",
+    "updated_at": "2026-03-26T13:39:58Z"
   },
   {
-    "additions": 17,
-    "author": "divyanks",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 64,
+    "author": "RicardoLee510520",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? Updated the DeiT model card to follow the new standardized format: - Replaced verbose paper abstract with concise model description - Added Pipeline and AutoModel usage examples - Renamed \"Usage tips\" to \"Notes\" - U\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44798",
-    "created_at": "2026-03-17T16:51:46Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44846",
+    "created_at": "2026-03-19T06:30:53Z",
+    "deletions": 90,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44798/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44798",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44846/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44846",
+    "labels": [],
     "merged": false,
-    "number": 44798,
+    "number": 44846,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add MPS (Apple Silicon) example and documentation",
-    "updated_at": "2026-03-18T15:37:09Z"
+    "title": "[Docs] Update DeiT model card to new format",
+    "updated_at": "2026-03-20T05:30:17Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Depends on #44887 and kernels being version `12.3` Works OOB with little changes! Example script for demonstration: ```python from transformers import AutoModelForCausalLM, AutoTokenizer fa_version = 4 #model_id = \"openai/gpt-oss-20b\" mode\u2026",
+    "additions": 15,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `torch.compile` failure for Mllama after #42848 introduced a new unified attention mask creation path. The root cause is a **torch inductor C++ codegen bug**: when `padding_mask_function` uses advanced tensor\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44797",
-    "created_at": "2026-03-17T15:35:59Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44797/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44797",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44845",
+    "created_at": "2026-03-19T06:14:54Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44845/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44845",
     "labels": [],
-    "merged": true,
-    "number": 44797,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[`FA4`] Add kernels fallback",
-    "updated_at": "2026-03-20T19:03:24Z"
+    "merged": false,
+    "number": 44845,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Fix Mllama torch.compile failure caused by new attention mask logic",
+    "updated_at": "2026-03-26T13:01:50Z"
   },
   {
-    "additions": 4522,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR refactors transformers serve so that it is not in a single file. We split it into multiple files with clear responsabilities. There were 2,293 lines initially in the serve.py file. ``` \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2026",
+    "additions": 8468,
+    "author": "sahilleth",
+    "author_association": "NONE",
+    "body_excerpt": "This PR makes a few small fixes on top of #37875 for the DEIM model: - Ensure `DeimConfig` / `DEIMConfig` and `DeimModel` / `DeimForObjectDetection` are correctly exposed from the `transformers` package. - Fix a configuration docstring lin\u2026",
     "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44796",
-    "created_at": "2026-03-17T13:04:06Z",
-    "deletions": 3100,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44796/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44796",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44844",
+    "created_at": "2026-03-19T05:50:29Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44844/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44844",
     "labels": [],
-    "merged": true,
-    "number": 44796,
-    "review_comments_count": 40,
+    "merged": false,
+    "number": 44844,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[refactor] Serving into proper modules",
-    "updated_at": "2026-04-01T16:03:37Z"
+    "title": "Fix DEIM config export and public API",
+    "updated_at": "2026-03-19T13:18:59Z"
   },
   {
-    "additions": 771,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds support for a more generic path, aligned with the rest of the loading! model | PR | main ----|-----|---------- \"gdax/Qwen1.5-MoE-A2.7B_gguf\"| 1min 5s |1min 18s",
-    "changed_files": 9,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44794",
-    "created_at": "2026-03-17T11:35:33Z",
-    "deletions": 400,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44794/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44794",
-    "labels": [],
+    "additions": 26,
+    "author": "omyaaa1",
+    "author_association": "NONE",
+    "body_excerpt": "Reintroduce handling for remote URLs using download_url, which was accidentally removed in recent versions. This restores support for loading image processor configs directly from URLs. Fixes #44821 # What does this PR do? <!-- Congratulat\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44821-7",
+    "cluster_ids": [
+      "cluster-44821-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44842",
+    "created_at": "2026-03-19T04:48:58Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44842/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44842",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44794,
+    "number": 44842,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refacto GGUF weight conversion",
-    "updated_at": "2026-03-17T17:03:08Z"
+    "state": "closed",
+    "title": "Fix AutoImageProcessor URL loading regression",
+    "updated_at": "2026-03-19T11:57:50Z"
   },
   {
-    "additions": 40,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44792 - Handles None values in Janus model's image generation mode. The `generate()` method for image generation had several places where it assumed certain config values would always be set, causing failure\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "zhulinchng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix formatting of code block in weightconverter.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44793",
-    "created_at": "2026-03-17T11:29:47Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44839",
+    "created_at": "2026-03-19T01:40:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44793/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44793",
+    "files_url": "https://github.com/huggingface/transformers/pull/44839/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44839",
     "labels": [],
-    "merged": false,
-    "number": 44793,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "fix(janus): Handle None values in image generation mode",
-    "updated_at": "2026-03-18T10:43:24Z"
+    "merged": true,
+    "number": 44839,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Correct code block formatting in weightconverter.md",
+    "updated_at": "2026-03-19T07:07:13Z"
   },
   {
-    "additions": 20,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? AMD Strix Halo APUs (gfx1151) have a driver bug where safetensors mmap doesn't release memory properly with the unified memory architecture. This causes OOM errors when loading models that should fit in memory (e.g.\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44791",
-    "created_at": "2026-03-17T10:33:22Z",
+    "additions": 41,
+    "author": "xr843",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes #44821 \u2014 `AutoImageProcessor.from_pretrained` fails with `OSError: Repo id must be in the form 'repo_name' or 'namespace/repo_name'` when given a URL - The URL handling branch (`is_remote_url` check) in `get_image_proces\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44821-7",
+    "cluster_ids": [
+      "cluster-44821-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44838",
+    "created_at": "2026-03-18T23:53:54Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44791/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44791",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44838/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44838",
+    "labels": [],
     "merged": false,
-    "number": 44791,
+    "number": 44838,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: disable mmap on Strix Halo APUs to avoid OOM",
-    "updated_at": "2026-03-18T12:33:05Z"
+    "title": "Fix AutoImageProcessor.from_pretrained failing with URL input",
+    "updated_at": "2026-03-19T10:43:53Z"
   },
   {
-    "additions": 72,
-    "author": "tarekziade",
+    "additions": 482,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch - adds a simple cache to the model linter so we skip files that did not change and were valid - reworks `Makefile` targets",
-    "changed_files": 6,
+    "body_excerpt": "backfills empty model cards like gptoss and nemotronh with a `model-card.md` skill i created. its pretty minimal at the moment and just includes a brief intro and code examples. let me know if there is anything else we should add!",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44790",
-    "created_at": "2026-03-17T08:54:47Z",
-    "deletions": 19,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44837",
+    "created_at": "2026-03-18T21:45:31Z",
+    "deletions": 102,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44790/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44790",
+    "files_url": "https://github.com/huggingface/transformers/pull/44837/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44837",
     "labels": [],
     "merged": true,
-    "number": 44790,
-    "review_comments_count": 1,
+    "number": 44837,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: added cache to the model linter",
-    "updated_at": "2026-03-24T15:28:29Z"
+    "title": "[docs] model cards",
+    "updated_at": "2026-03-20T22:40:41Z"
   },
   {
-    "additions": 21,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some configs from the hub have different types.",
-    "changed_files": 7,
+    "additions": 96,
+    "author": "tyler-romero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Pass cu_seqlens derived from packed attention masks to FLA's ShortConvolution and chunk_gated_delta_rule kernels, preventing recurrent state from leaking across sequence boundaries during packed-sequence training. F\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44789",
-    "created_at": "2026-03-17T08:41:30Z",
-    "deletions": 21,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44789/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44789",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44836",
+    "created_at": "2026-03-18T20:24:58Z",
+    "deletions": 20,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44836/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44836",
     "labels": [],
-    "merged": true,
-    "number": 44789,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Fix config loading issues (type issues)",
-    "updated_at": "2026-03-17T09:44:50Z"
+    "merged": false,
+    "number": 44836,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add cu_seqlens support to OlmoHybridGatedDeltaNet for packed sequences",
+    "updated_at": "2026-03-19T05:34:43Z"
   },
   {
-    "additions": 0,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The pipeline() docstring included an example using the 'question-answering' task, but this task is not in SUPPORTED_TASKS and will raise an error when used. Remove this outdated example to avoid confusing users following the documentation.\u2026",
-    "changed_files": 1,
+    "additions": 187,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary This PR adds the `return_logprobs` flag to the continuous batching, enabling the user to retrieve the log probabilites of the tokens generated. # Tests Added a test to compare with regular generate and it passes. All tests pass.\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44788",
-    "created_at": "2026-03-17T08:38:25Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44835",
+    "created_at": "2026-03-18T17:48:15Z",
+    "deletions": 83,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44788/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44788",
+    "files_url": "https://github.com/huggingface/transformers/pull/44835/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44835",
     "labels": [],
-    "merged": false,
-    "number": 44788,
+    "merged": true,
+    "number": 44835,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs(pipelines): remove outdated question-answering example",
-    "updated_at": "2026-03-23T17:19:33Z"
+    "title": "[CB] Add an option to return logprobs",
+    "updated_at": "2026-03-23T18:35:31Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The question-answering pipeline was removed in v5.0.0 per MIGRATION_GUIDE_V5.md, but the non-English task guides still referenced it. This updates the Arabic, Chinese, Japanese, and Korean question answering task guides to remove usage of\u2026",
-    "changed_files": 4,
+    "additions": 0,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Oups this slipped through in https://github.com/huggingface/transformers/pull/44833",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44787",
-    "created_at": "2026-03-17T08:24:09Z",
-    "deletions": 66,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44834",
+    "created_at": "2026-03-18T17:07:11Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44787/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44787",
+    "files_url": "https://github.com/huggingface/transformers/pull/44834/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44834",
     "labels": [],
     "merged": true,
-    "number": 44787,
+    "number": 44834,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs(tasks): remove references to removed question-answering pipeline",
-    "updated_at": "2026-03-17T16:23:50Z"
+    "title": "Update more modular examples",
+    "updated_at": "2026-03-18T17:18:54Z"
   },
   {
-    "additions": 25,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "AMD Strix Halo APUs (gfx1151) experience OOM errors when loading large models via safetensors mmap due to unified memory architecture issues. This fix detects Strix Halo GPUs by checking the GPU architecture name (gfx1151) and forces a CPU\u2026",
-    "changed_files": 1,
+    "additions": 299,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix the examples after the config change (https://github.com/huggingface/transformers/pull/41250), and re-run conversion as in general modelings changed quite a bit in the lib.",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44786",
-    "created_at": "2026-03-17T08:17:32Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44833",
+    "created_at": "2026-03-18T16:35:34Z",
+    "deletions": 590,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44786/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44786",
+    "files_url": "https://github.com/huggingface/transformers/pull/44833/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44833",
     "labels": [],
-    "merged": false,
-    "number": 44786,
+    "merged": true,
+    "number": 44833,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-17T10:29:44Z"
+    "title": "Fix and re-run modular converter on examples",
+    "updated_at": "2026-03-18T17:00:44Z"
   },
   {
-    "additions": 307,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026",
-    "changed_files": 1,
+    "additions": 346,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44785",
-    "created_at": "2026-03-17T06:55:31Z",
-    "deletions": 83,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44832",
+    "created_at": "2026-03-18T15:33:15Z",
+    "deletions": 176,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44785/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44785",
+    "files_url": "https://github.com/huggingface/transformers/pull/44832/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44832",
     "labels": [],
-    "merged": false,
-    "number": 44785,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44832,
+    "review_comments_count": 37,
     "state": "closed",
-    "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-17T10:28:06Z"
+    "title": "DeepGEMM",
+    "updated_at": "2026-03-31T15:04:06Z"
   },
   {
     "additions": 2,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026",
-    "changed_files": 1,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix loading in Sam3 which currently doesn't match the state dict keys from checkpoint. Adding a correct base model prefix will add it to all state dict keys, making the ckpt load-able <!-- Congratulations! You've ma\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44783",
-    "created_at": "2026-03-17T05:58:54Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44831",
+    "created_at": "2026-03-18T14:50:48Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44783/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44783",
+    "files_url": "https://github.com/huggingface/transformers/pull/44831/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44831",
     "labels": [],
-    "merged": false,
-    "number": 44783,
+    "merged": true,
+    "number": 44831,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer",
-    "updated_at": "2026-03-17T11:12:52Z"
+    "state": "closed",
+    "title": "Fix loading issue in Sam3",
+    "updated_at": "2026-03-18T15:44:01Z"
   },
   {
-    "additions": 6,
-    "author": "JiwaniZakir",
+    "additions": 2103,
+    "author": "lashahub",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44737-6",
-    "cluster_ids": [
-      "cluster-44737-6"
-    ],
-    "cluster_role": "canonical",
+    "body_excerpt": "This PR adds `AudioFlamingoNext` as a separate model name that inherits directly from `MusicFlamingo` #43538 and keeps the same architecture and behavior. Changes: - add `audioflamingonext` model files - register it in the auto mappings -\u2026",
+    "changed_files": 27,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44830",
+    "created_at": "2026-03-18T14:31:45Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44830/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44830",
+    "labels": [],
+    "merged": false,
+    "number": 44830,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Add AudioFlamingoNext model",
+    "updated_at": "2026-04-13T07:40:23Z"
+  },
+  {
+    "additions": 101,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Last batch finally! Follow up of https://github.com/huggingface/transformers/pull/44759 and many other",
+    "changed_files": 33,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44782",
-    "created_at": "2026-03-17T05:11:36Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44828",
+    "created_at": "2026-03-18T13:52:32Z",
+    "deletions": 512,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44782/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44782",
+    "files_url": "https://github.com/huggingface/transformers/pull/44828/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44828",
     "labels": [],
     "merged": true,
-    "number": 44782,
+    "number": 44828,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward",
-    "updated_at": "2026-03-19T13:30:48Z"
+    "title": "Remove cache_position in more models (4 and last one)",
+    "updated_at": "2026-03-18T16:02:46Z"
   },
   {
-    "additions": 5,
-    "author": "bensons",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026",
-    "changed_files": 2,
+    "additions": 80,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "https://github.com/huggingface/transformers/pull/44825",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44781",
-    "created_at": "2026-03-17T04:59:02Z",
-    "deletions": 2849,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44827",
+    "created_at": "2026-03-18T13:36:53Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44781/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44781",
+    "files_url": "https://github.com/huggingface/transformers/pull/44827/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44827",
     "labels": [],
     "merged": false,
-    "number": 44781,
-    "review_comments_count": 0,
+    "number": 44827,
+    "review_comments_count": 6,
     "state": "open",
-    "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`",
-    "updated_at": "2026-03-27T23:19:21Z"
+    "title": "Fix Mistral4 tests",
+    "updated_at": "2026-03-26T16:04:20Z"
   },
   {
-    "additions": 145,
-    "author": "LincolnBurrows2017",
+    "additions": 55,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.",
-    "changed_files": 8,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44778",
-    "created_at": "2026-03-16T23:41:29Z",
-    "deletions": 28,
+    "body_excerpt": "Fixes #44821 This PR fixes the issue where `AutoImageProcessor.from_pretrained()` was unable to load from a URL (e.g., `https://huggingface.co/.../raw/main/config.json`). The bug was introduced in transformers>=5.3.0. Prior versions (e.g.,\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44821-7",
+    "cluster_ids": [
+      "cluster-44821-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44826",
+    "created_at": "2026-03-18T12:08:35Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44778/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44778",
+    "files_url": "https://github.com/huggingface/transformers/pull/44826/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44826",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44778,
+    "number": 44826,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: correct logic error in is_tiktoken_available function",
-    "updated_at": "2026-03-18T13:15:37Z"
+    "title": "fix: allow AutoImageProcessor to load from URL",
+    "updated_at": "2026-03-19T13:28:04Z"
   },
   {
-    "additions": 35,
-    "author": "stevhliu",
+    "additions": 1,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)",
+    "body_excerpt": "# What does this PR do? As per the title. cc @3outeille as I know you're looking into it",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44777",
-    "created_at": "2026-03-16T21:58:40Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44825",
+    "created_at": "2026-03-18T12:05:35Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44777/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44777",
+    "files_url": "https://github.com/huggingface/transformers/pull/44825/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44825",
     "labels": [],
     "merged": true,
-    "number": 44777,
-    "review_comments_count": 1,
+    "number": 44825,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] is_causal feature",
-    "updated_at": "2026-03-17T19:50:43Z"
+    "title": "[CI] Temporarily skip Mistral4 tests as they almost all fail",
+    "updated_at": "2026-03-18T12:15:34Z"
   },
   {
-    "additions": 0,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist",
+    "additions": 5,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Fixes #44737 The `relative_positional_encoding` function in XLNet was computing all positional encodings on CPU every forward pass because the `torch.arange` calls were missing the `device` parameter. ## Changes - Added devi\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-44737-6",
+    "cluster_ids": [
+      "cluster-44737-6"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44776",
-    "created_at": "2026-03-16T20:43:33Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44824",
+    "created_at": "2026-03-18T11:55:01Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44776/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44776",
-    "labels": [],
-    "merged": true,
-    "number": 44776,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[fix] mistral 4 docs",
-    "updated_at": "2026-03-16T21:11:29Z"
-  },
-  {
-    "additions": 177,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44775",
-    "created_at": "2026-03-16T20:23:29Z",
-    "deletions": 109,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44775/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44775",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44824/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44824",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44775,
+    "number": 44824,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[docs] n-d parallelism",
-    "updated_at": "2026-03-16T20:28:48Z"
+    "state": "closed",
+    "title": "fix(xlnet): add device parameter to relative_positional_encoding",
+    "updated_at": "2026-03-18T13:17:38Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`",
+    "additions": 41,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes #44821 where `AutoImageProcessor.from_pretrained()` couldn't load from a direct URL to a config file. ## Problem When passing a URL like `https://huggingface.co/jinfengxie/BFMS_1014/raw/main/config.json` to `AutoImageProcesso\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44774",
-    "created_at": "2026-03-16T18:56:19Z",
-    "deletions": 6,
+    "cluster_id": "cluster-44821-7",
+    "cluster_ids": [
+      "cluster-44821-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44823",
+    "created_at": "2026-03-18T11:54:24Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44774/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44774",
-    "labels": [],
-    "merged": true,
-    "number": 44774,
+    "files_url": "https://github.com/huggingface/transformers/pull/44823/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44823",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44823,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove `is_causal` from `EuroBertConfig`",
-    "updated_at": "2026-03-17T09:33:21Z"
+    "title": "fix: AutoImageProcessor from URL loading",
+    "updated_at": "2026-03-18T13:17:48Z"
   },
   {
-    "additions": 3,
-    "author": "githubnemo",
-    "author_association": "MEMBER",
-    "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44772",
-    "created_at": "2026-03-16T18:46:13Z",
-    "deletions": 3,
+    "additions": 4,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44737 The relative_positional_encoding method in XLNetModel was creating tensors using torch.arange() without specifying device=, causing the entire sinusoidal positional encoding computation to run on CPU every forward pass. Only t\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44737-6",
+    "cluster_ids": [
+      "cluster-44737-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44822",
+    "created_at": "2026-03-18T11:48:28Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44772/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44772",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44822/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44822",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44772,
+    "number": 44822,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "bitsandbytes: Update links and docs",
-    "updated_at": "2026-03-17T15:57:56Z"
+    "state": "closed",
+    "title": "fix: XLNet relative_positional_encoding device placement",
+    "updated_at": "2026-03-18T13:17:30Z"
   },
   {
-    "additions": 2,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? wtf",
+    "additions": 14,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44811 ## Problem When calling `processor.batch_decode(predicted_ids, skip_special_tokens=False)` with the output from `model.generate()` (without `return_dict_in_generate=True`), the `skip_special_tokens` parameter was being ignored\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44771",
-    "created_at": "2026-03-16T18:45:11Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44771/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44771",
-    "labels": [],
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44820",
+    "created_at": "2026-03-18T10:57:12Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44820/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44820",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44771,
+    "number": 44820,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "wtf",
-    "updated_at": "2026-03-16T18:56:00Z"
+    "state": "closed",
+    "title": "fix(whisper): respect skip_special_tokens in batch_decode",
+    "updated_at": "2026-03-18T13:17:20Z"
   },
   {
-    "additions": 203,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice",
-    "changed_files": 12,
+    "additions": 2,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description This PR fixes the DeepSeek tokenizer issue (#44779) where tokenization produces incorrect results in v5. ### Problem In transformers v5, the DeepSeek tokenizer (DeepSeek-R1) was producing incorrect results: - Input: \"How are\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44770",
-    "created_at": "2026-03-16T18:44:03Z",
-    "deletions": 268,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44819",
+    "created_at": "2026-03-18T10:55:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44770/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44770",
-    "labels": [],
-    "merged": true,
-    "number": 44770,
-    "review_comments_count": 1,
+    "files_url": "https://github.com/huggingface/transformers/pull/44819/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44819",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44819,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix configs with `@strict`",
-    "updated_at": "2026-03-17T15:39:43Z"
+    "title": "fix(tokenizer): add deepseek_v2 and deepseek_v3 to incorrect hub tokenizer class list",
+    "updated_at": "2026-03-18T14:11:16Z"
   },
   {
-    "additions": 145,
-    "author": "LincolnBurrows2017",
+    "additions": 64,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026",
-    "changed_files": 8,
+    "body_excerpt": "## Description Fixes #44805 When training multimodal models (Qwen3-VL, GLM-4.6V, Qwen3-VL-MoE) with LoRA adapters, the `attention_mask` and `mm_token_type_ids` tensors can have different shapes. This causes an IndexError when the `get_rope\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44769",
-    "created_at": "2026-03-16T18:40:07Z",
-    "deletions": 28,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44818",
+    "created_at": "2026-03-18T10:46:22Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44769/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44769",
+    "files_url": "https://github.com/huggingface/transformers/pull/44818/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44818",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44769,
+    "number": 44818,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Handle empty lists in video_utils functions",
-    "updated_at": "2026-03-18T13:15:55Z"
+    "title": "fix: resolve mask shape mismatch IndexError in multimodal VL models",
+    "updated_at": "2026-03-18T10:51:43Z"
   },
   {
-    "additions": 20,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026",
-    "changed_files": 1,
+    "additions": 28,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/97. This PR adds `enable_thinking` to the chat-template kwargs. With this change, `enable_thinking` is treated as a template-level argument in the tokenize=True path, so\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44768",
-    "created_at": "2026-03-16T18:29:52Z",
-    "deletions": 9,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44817",
+    "created_at": "2026-03-18T10:44:11Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44768/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44768",
+    "files_url": "https://github.com/huggingface/transformers/pull/44817/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44817",
     "labels": [],
-    "merged": true,
-    "number": 44768,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 44817,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026",
-    "updated_at": "2026-04-09T17:11:55Z"
+    "title": "[Misc] add enable_thinking to template kwargs",
+    "updated_at": "2026-03-20T14:56:04Z"
   },
   {
-    "additions": 11,
-    "author": "tarekziade",
+    "additions": 98,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per the title. This is the last of many PR to remove the `cache_position`. At this point, all the models were already updated to not use them, and they are fully ignored in all the modelings. So this removes thei\u2026",
+    "changed_files": 57,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44767",
-    "created_at": "2026-03-16T17:31:26Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44767/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44767",
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44816",
+    "created_at": "2026-03-18T10:32:04Z",
+    "deletions": 375,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44816/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44816",
     "labels": [],
     "merged": true,
-    "number": 44767,
-    "review_comments_count": 6,
+    "number": 44816,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs",
-    "updated_at": "2026-03-16T19:02:31Z"
+    "title": "[generate] Never use `cache_position` anymore in generation",
+    "updated_at": "2026-03-19T14:18:28Z"
   },
   {
-    "additions": 26,
-    "author": "itazap",
+    "additions": 135,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44766",
-    "created_at": "2026-03-16T17:30:23Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44766/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44766",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44815",
+    "created_at": "2026-03-18T09:54:18Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44815/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44815",
     "labels": [],
-    "merged": true,
-    "number": 44766,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "support xxxFast alias in v5 tokenizers",
-    "updated_at": "2026-03-18T13:40:05Z"
+    "merged": false,
+    "number": 44815,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Dequant fix",
+    "updated_at": "2026-03-24T14:39:52Z"
   },
   {
-    "additions": 19,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44792 This PR fixes the failing test `test_model_generate_images` for the Janus model. ## Problem When generating images with the Janus model, `generation_config.num_return_sequences` and `generation_config.max_length` can be `None`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44765",
-    "created_at": "2026-03-16T17:26:22Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44814",
+    "created_at": "2026-03-18T09:51:34Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44765/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44765",
+    "files_url": "https://github.com/huggingface/transformers/pull/44814/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44814",
     "labels": [],
-    "merged": true,
-    "number": 44765,
+    "merged": false,
+    "number": 44814,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main",
-    "updated_at": "2026-03-20T13:55:55Z"
+    "title": "fix(janus): handle None values in image generation mode",
+    "updated_at": "2026-03-18T10:42:50Z"
   },
   {
-    "additions": 12,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher",
-    "changed_files": 3,
+    "additions": 20,
+    "author": "hf-security-analysis[bot]",
+    "author_association": "NONE",
+    "body_excerpt": "Update `.github/workflows/pr-repo-consistency-bot.yml` workflow configuration. cc @ydshieh Closes huggingface/tracking-issues#26",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44764",
-    "created_at": "2026-03-16T17:15:40Z",
-    "deletions": 4,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44813",
+    "created_at": "2026-03-18T09:49:16Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44764/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44764",
+    "files_url": "https://github.com/huggingface/transformers/pull/44813/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44813",
     "labels": [],
-    "merged": true,
-    "number": 44764,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44813,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: sig lip import",
-    "updated_at": "2026-03-16T17:38:41Z"
+    "title": "chore: update pr-repo-consistency-bot.yml",
+    "updated_at": "2026-03-18T10:08:04Z"
   },
   {
-    "additions": 17,
-    "author": "xenova",
+    "additions": 7,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Some checks (for example, modular checks) really require the installation from PR branch.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44763",
-    "created_at": "2026-03-16T17:04:36Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44812",
+    "created_at": "2026-03-18T09:40:36Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44763/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44763",
+    "files_url": "https://github.com/huggingface/transformers/pull/44812/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44812",
     "labels": [],
-    "merged": false,
-    "number": 44763,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44812,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[nemotron_h] Add support for MLP mixers",
-    "updated_at": "2026-04-09T02:15:49Z"
+    "title": "Fix repo-check bot",
+    "updated_at": "2026-03-18T09:49:50Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
+    "additions": 7,
+    "author": "LincolnBurrows2017",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44737-6",
-    "cluster_ids": [
-      "cluster-44737-6"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "Fixed bare except clause in _safe_convert_tensor function to catch only Exception type.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44762",
-    "created_at": "2026-03-16T16:17:54Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44809",
+    "created_at": "2026-03-18T05:47:03Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44762/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44762",
+    "files_url": "https://github.com/huggingface/transformers/pull/44809/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44809",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44762,
+    "number": 44809,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation",
-    "updated_at": "2026-03-18T15:16:14Z"
+    "title": "fix: replace bare except with Exception in Fuyu image processing",
+    "updated_at": "2026-03-18T13:17:11Z"
   },
   {
-    "additions": 152,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```",
-    "changed_files": 3,
+    "additions": 2759,
+    "author": "zhang-prog",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 31,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44761",
-    "created_at": "2026-03-16T16:05:03Z",
-    "deletions": 7,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44808",
+    "created_at": "2026-03-18T04:29:07Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44761/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44761",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44808/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44808",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44761,
-    "review_comments_count": 7,
+    "number": 44808,
+    "review_comments_count": 53,
     "state": "closed",
-    "title": "model-linter: Added rule 10",
-    "updated_at": "2026-03-17T08:52:19Z"
+    "title": "[Model] Add PP-OCRv5_server_rec and  PP-OCRv5_mobile_rec models Support",
+    "updated_at": "2026-03-18T20:24:50Z"
   },
   {
-    "additions": 2090,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
+    "additions": 0,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The question-answering pipeline was removed in v5.3 per the migration guide, but the Chinese, Korean, and French quicktour docs still listed it as an available pipeline task. This removes those outdated references to avoid confusing users\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44760",
-    "created_at": "2026-03-16T15:54:11Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44807",
+    "created_at": "2026-03-18T03:41:52Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44760/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44760",
+    "files_url": "https://github.com/huggingface/transformers/pull/44807/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44807",
     "labels": [
-      "New model"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 44760,
-    "review_comments_count": 8,
+    "merged": false,
+    "number": 44807,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add Mistral 4",
-    "updated_at": "2026-03-20T10:44:48Z"
+    "title": "docs(quicktour): remove question-answering pipeline from quicktour tables",
+    "updated_at": "2026-03-18T15:38:09Z"
   },
   {
-    "additions": 419,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of many related PR, last one in time being https://github.com/huggingface/transformers/pull/44602. This PR completes all the models that may need non-trivial treatment. Only about 30-40 models still have m\u2026",
-    "changed_files": 42,
+    "additions": 23,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes OOM errors when loading models on AMD Strix Halo APUs. ## Problem AMD Strix Halo (Radeon 8060S/8050S) uses unified memory architecture where memory-mapped file loading doesn't work well with the current amdgpu driver. This ca\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44759",
-    "created_at": "2026-03-16T15:38:13Z",
-    "deletions": 983,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44759/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44759",
-    "labels": [],
-    "merged": true,
-    "number": 44759,
-    "review_comments_count": 26,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44806",
+    "created_at": "2026-03-18T03:33:59Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44806/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44806",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44806,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove cache_position in more models (3)",
-    "updated_at": "2026-03-18T13:09:37Z"
+    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-18T12:30:21Z"
   },
   {
-    "additions": 825,
-    "author": "LysandreJik",
+    "additions": 137,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "Provides a nicer feedback when `transformers chat` loads a model, instead of hanging https://github.com/user-attachments/assets/8f68f914-b702-4430-b97f-e8cc25326b70 <p>Adds a <code>POST /load_model</code> endpoint to <code>transformers ser\u2026",
-    "changed_files": 7,
+    "body_excerpt": "updates the peft docs: - a more complete training section with a full code snippet, describe saving behavior, resuming from a checkpoint, and distributed training - adds some undocumented API methods (`delete_adapter`, `active_adapters`) -\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44758",
-    "created_at": "2026-03-16T15:02:15Z",
-    "deletions": 63,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44804",
+    "created_at": "2026-03-18T00:08:54Z",
+    "deletions": 89,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44758/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44758",
+    "files_url": "https://github.com/huggingface/transformers/pull/44804/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44804",
     "labels": [],
     "merged": true,
-    "number": 44758,
-    "review_comments_count": 20,
+    "number": 44804,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Propagate the model loading from transformers serve to chat",
-    "updated_at": "2026-03-19T17:20:03Z"
+    "title": "[docs] peft",
+    "updated_at": "2026-03-23T17:14:58Z"
   },
   {
-    "additions": 1,
-    "author": "dacorvo",
+    "additions": 1341,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - `_valid_auto_compile_criteria()` gates auto-compilation on `device.type in [\"cuda\", \"xpu\"]`, excluding Neuron devices. This means `torch.compile` never triggers automatically on Neuron even when `StaticCache` is used (which se\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? (Finally) add support for checking+fixing both generated files and modular files in `check_auto_docstrings`. Also `auto_docstring` was recently added to configs, and this PR updates `check_auto_docstrings` to suppor\u2026",
+    "changed_files": 244,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44757",
-    "created_at": "2026-03-16T14:54:38Z",
-    "deletions": 1,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44803",
+    "created_at": "2026-03-17T22:40:45Z",
+    "deletions": 1105,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44757/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44757",
+    "files_url": "https://github.com/huggingface/transformers/pull/44803/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44803",
     "labels": [],
-    "merged": false,
-    "number": 44757,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add Neuron to auto-compile hardware list",
-    "updated_at": "2026-03-16T15:05:00Z"
+    "merged": true,
+    "number": 44803,
+    "review_comments_count": 24,
+    "state": "closed",
+    "title": "Support Modular (!!) + Configs in `check_auto_docstrings`",
+    "updated_at": "2026-03-24T17:59:12Z"
   },
   {
-    "additions": 4,
-    "author": "zucchini-nlp",
+    "additions": 12,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub fixes: https://github.com/huggingface/transformers/issues/44779, https://github.com/huggingface/transformers/pull/44783",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44755",
-    "created_at": "2026-03-16T14:08:34Z",
-    "deletions": 148,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44801",
+    "created_at": "2026-03-17T17:40:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44755/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44755",
+    "files_url": "https://github.com/huggingface/transformers/pull/44801/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44801",
     "labels": [],
-    "merged": false,
-    "number": 44755,
+    "merged": true,
+    "number": 44801,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Dont merge, testing smth",
-    "updated_at": "2026-03-18T10:09:15Z"
+    "title": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub",
+    "updated_at": "2026-03-19T13:11:54Z"
   },
   {
-    "additions": 20,
-    "author": "gambletan",
+    "additions": 36,
+    "author": "aayushbaluni",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44737 - Added `device=self.device` to all four `torch.arange()` calls in `XLNetModel.relative_positional_encoding()` so that intermediate tensors are created directly on the model's device instead of always on CPU. - With\u2026",
+    "body_excerpt": "## Summary Fixes #44488 `CamembertTokenizer` raised `ValueError: too many values to unpack (expected 2)` when loading models like `cjvt/sleng-bert` that provide vocab as a dict `{token: id}` from `tokenizer.json` (BPE format). The tokenize\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-44737-6",
-    "cluster_ids": [
-      "cluster-44737-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44753",
-    "created_at": "2026-03-16T14:01:08Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44753/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44753",
-    "labels": [],
-    "merged": false,
-    "number": 44753,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: add device= to torch.arange in XLNet relative_positional_encoding",
-    "updated_at": "2026-03-18T13:28:40Z"
-  },
-  {
-    "additions": 100,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR fix? The `flatten()` function in `tokenization_utils_base.py` had a bug where it was checking `arr[0]` instead of `sub_arr` when determining if an element should be recursively flattened. ### Bug Details - **File**: `s\u2026",
-    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44751",
-    "created_at": "2026-03-16T13:40:44Z",
-    "deletions": 29,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44800",
+    "created_at": "2026-03-17T17:20:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44751/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44751",
+    "files_url": "https://github.com/huggingface/transformers/pull/44800/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44800",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44751,
+    "number": 44800,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Correct variable reference in flatten() function",
-    "updated_at": "2026-03-18T13:16:12Z"
+    "title": "fix: handle dict vocab in CamembertTokenizer for tokenizer.json (#44488)",
+    "updated_at": "2026-03-18T15:37:54Z"
   },
   {
-    "additions": 1,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds `apply_yarn_scaling` as an optional key for yarn repo. This was requested as part of a vLLM PR https://github.com/vllm-project/vllm/pull/37104 that seeks to silence some rope issues when converting Mist\u2026",
-    "changed_files": 1,
+    "additions": 327,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates the Hardware section of the docs for training: - combined CPU/Distributed CPU into a single doc - add more info to the Gaudi doc (mixed precision, torch.compile, distributed training) - add more info to the MPS doc (mixed precision\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44747",
-    "created_at": "2026-03-16T10:32:54Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44799",
+    "created_at": "2026-03-17T17:19:51Z",
+    "deletions": 574,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44747/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44747",
+    "files_url": "https://github.com/huggingface/transformers/pull/44799/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44799",
     "labels": [],
-    "merged": false,
-    "number": 44747,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44799,
+    "review_comments_count": 24,
     "state": "closed",
-    "title": "Add apply_yarn_scaling as optional key to yarn",
-    "updated_at": "2026-03-16T12:48:08Z"
+    "title": "[docs] training on specific hardware",
+    "updated_at": "2026-04-13T07:12:08Z"
   },
   {
-    "additions": 202,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed Issue #44737: XLNet relative_positional_encoding function missing device parameter in torch.arange calls.",
-    "changed_files": 11,
+    "additions": 17,
+    "author": "divyanks",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44745",
-    "created_at": "2026-03-16T09:39:30Z",
-    "deletions": 33,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44798",
+    "created_at": "2026-03-17T16:51:46Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44745/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44745",
+    "files_url": "https://github.com/huggingface/transformers/pull/44798/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44798",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44745,
+    "number": 44798,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add device parameter to torch.arange calls in XLNet",
-    "updated_at": "2026-03-18T13:16:43Z"
+    "title": "Add MPS (Apple Silicon) example and documentation",
+    "updated_at": "2026-03-18T15:37:09Z"
   },
   {
-    "additions": 35,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Depends on #44887 and kernels being version `12.3` Works OOB with little changes! Example script for demonstration: ```python from transformers import AutoModelForCausalLM, AutoTokenizer fa_version = 4 #model_id = \"openai/gpt-oss-20b\" mode\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44739",
-    "created_at": "2026-03-16T07:15:33Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44797",
+    "created_at": "2026-03-17T15:35:59Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44739/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44739",
+    "files_url": "https://github.com/huggingface/transformers/pull/44797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44797",
     "labels": [],
     "merged": true,
-    "number": 44739,
-    "review_comments_count": 12,
+    "number": 44797,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix series of failed test case for janus model",
-    "updated_at": "2026-04-01T08:24:26Z"
+    "title": "[`FA4`] Add kernels fallback",
+    "updated_at": "2026-03-20T19:03:24Z"
   },
   {
-    "additions": 6,
-    "author": "yunhaoli24",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44031 ## The Problem The condition for calling `_patch_mistral_regex` was too broad (`vocab_size > 100000`), causing non-Mistral models like Qwen, LLaMA, BGE-Reranker to show incorrect regex pattern warnings\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44736",
-    "created_at": "2026-03-16T06:00:47Z",
-    "deletions": 1,
+    "additions": 4522,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR refactors transformers serve so that it is not in a single file. We split it into multiple files with clear responsabilities. There were 2,293 lines initially in the serve.py file. ``` \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44796",
+    "created_at": "2026-03-17T13:04:06Z",
+    "deletions": 3100,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44736/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44736",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44736,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44796/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44796",
+    "labels": [],
+    "merged": true,
+    "number": 44796,
+    "review_comments_count": 40,
     "state": "closed",
-    "title": "fix: resolve false-positive regex warning for non-mistral models",
-    "updated_at": "2026-03-18T15:08:59Z"
+    "title": "[refactor] Serving into proper modules",
+    "updated_at": "2026-04-01T16:03:37Z"
   },
   {
-    "additions": 1,
-    "author": "mango766",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `transformers serve` when the `/v1/responses` streaming endpoint attempts to reuse a KV cache from a previous request in the same conversation session. ### The bug In `generate_response`, `inputs`\u2026",
-    "changed_files": 1,
+    "additions": 771,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds support for a more generic path, aligned with the rest of the loading! model | PR | main ----|-----|---------- \"gdax/Qwen1.5-MoE-A2.7B_gguf\"| 1min 5s |1min 18s",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44735",
-    "created_at": "2026-03-16T04:09:32Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44735/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44735",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44794",
+    "created_at": "2026-03-17T11:35:33Z",
+    "deletions": 400,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44794/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44794",
     "labels": [],
-    "merged": true,
-    "number": 44735,
+    "merged": false,
+    "number": 44794,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix tensor indexing crash in serve generate_response KV cache continuation",
-    "updated_at": "2026-03-16T15:27:59Z"
+    "state": "open",
+    "title": "Refacto GGUF weight conversion",
+    "updated_at": "2026-03-17T17:03:08Z"
   },
   {
-    "additions": 28,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil, pls help review, thx!",
-    "changed_files": 3,
+    "additions": 40,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44792 - Handles None values in Janus model's image generation mode. The `generate()` method for image generation had several places where it assumed certain config values would always be set, causing failure\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44733",
-    "created_at": "2026-03-16T02:55:54Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44793",
+    "created_at": "2026-03-17T11:29:47Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44733/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44733",
+    "files_url": "https://github.com/huggingface/transformers/pull/44793/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44793",
     "labels": [],
-    "merged": true,
-    "number": 44733,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix bug and add XPU Expectations for qwen2 and jamba tests",
-    "updated_at": "2026-04-01T08:24:40Z"
+    "merged": false,
+    "number": 44793,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "fix(janus): Handle None values in image generation mode",
+    "updated_at": "2026-03-18T10:43:24Z"
   },
   {
-    "additions": 1,
-    "author": "Defalt-Meh",
+    "additions": 20,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? While running SmolVLM tests I noticed this warning in the output: ``` tests/test_video_processing_common.py:57: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider convert\u2026",
+    "body_excerpt": "# What does this PR do? AMD Strix Halo APUs (gfx1151) have a driver bug where safetensors mmap doesn't release memory properly with the unified memory architecture. This causes OOM errors when loading models that should fit in memory (e.g.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44731",
-    "created_at": "2026-03-15T23:26:31Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44791",
+    "created_at": "2026-03-17T10:33:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44731/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44731",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44791/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44791",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44731,
+    "number": 44791,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Tests] Fix slow video tensor creation from list of numpy arrays in SmolVLM",
-    "updated_at": "2026-03-15T23:26:31Z"
+    "state": "closed",
+    "title": "fix: disable mmap on Strix Halo APUs to avoid OOM",
+    "updated_at": "2026-03-18T12:33:05Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh",
+    "additions": 72,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? It's unclear why the config class has `model_type = \"mlcd_vision_model\"` but the model on the hub has \"model_type\": \"mlcd\". This leads to the following failures (load from hub --> save locally --> local locally) ```\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This patch - adds a simple cache to the model linter so we skip files that did not change and were valid - reworks `Makefile` targets",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44730",
-    "created_at": "2026-03-15T20:44:32Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44790",
+    "created_at": "2026-03-17T08:54:47Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44730/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44730",
+    "files_url": "https://github.com/huggingface/transformers/pull/44790/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44790",
     "labels": [],
     "merged": true,
-    "number": 44730,
-    "review_comments_count": 6,
+    "number": 44790,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix `mlcd` auto config/model/mapping issues",
-    "updated_at": "2026-03-16T12:12:30Z"
+    "title": "feat: added cache to the model linter",
+    "updated_at": "2026-03-24T15:28:29Z"
   },
   {
-    "additions": 214,
-    "author": "xenova",
+    "additions": 21,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026",
-    "changed_files": 58,
+    "body_excerpt": "# What does this PR do? Some configs from the hub have different types.",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44729",
-    "created_at": "2026-03-15T20:29:38Z",
-    "deletions": 225,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44789",
+    "created_at": "2026-03-17T08:41:30Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44729/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44729",
+    "files_url": "https://github.com/huggingface/transformers/pull/44789/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44789",
     "labels": [],
-    "merged": false,
-    "number": 44729,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Avoid floating point math for ceil operations",
-    "updated_at": "2026-03-15T20:49:34Z"
+    "merged": true,
+    "number": 44789,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Fix config loading issues (type issues)",
+    "updated_at": "2026-03-17T09:44:50Z"
   },
   {
-    "additions": 88,
-    "author": "ajmeese7",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026",
-    "changed_files": 2,
+    "additions": 0,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The pipeline() docstring included an example using the 'question-answering' task, but this task is not in SUPPORTED_TASKS and will raise an error when used. Remove this outdated example to avoid confusing users following the documentation.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44728",
-    "created_at": "2026-03-15T19:56:44Z",
-    "deletions": 1,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44788",
+    "created_at": "2026-03-17T08:38:25Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44728/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44728",
+    "files_url": "https://github.com/huggingface/transformers/pull/44788/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44788",
     "labels": [],
     "merged": false,
-    "number": 44728,
+    "number": 44788,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix float16 memory leak during 4-bit quantized model loading",
-    "updated_at": "2026-03-16T20:53:54Z"
+    "title": "docs(pipelines): remove outdated question-answering example",
+    "updated_at": "2026-03-23T17:19:33Z"
   },
   {
-    "additions": 202,
-    "author": "LincolnBurrows2017",
+    "additions": 4,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.",
-    "changed_files": 11,
+    "body_excerpt": "The question-answering pipeline was removed in v5.0.0 per MIGRATION_GUIDE_V5.md, but the non-English task guides still referenced it. This updates the Arabic, Chinese, Japanese, and Korean question answering task guides to remove usage of\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44727",
-    "created_at": "2026-03-15T19:41:24Z",
-    "deletions": 33,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44787",
+    "created_at": "2026-03-17T08:24:09Z",
+    "deletions": 66,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44727/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44727",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44727,
+    "files_url": "https://github.com/huggingface/transformers/pull/44787/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44787",
+    "labels": [],
+    "merged": true,
+    "number": 44787,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file",
-    "updated_at": "2026-03-18T13:15:46Z"
+    "title": "docs(tasks): remove references to removed question-answering pipeline",
+    "updated_at": "2026-03-17T16:23:50Z"
   },
   {
-    "additions": 198,
-    "author": "LincolnBurrows2017",
+    "additions": 25,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).",
-    "changed_files": 10,
+    "body_excerpt": "AMD Strix Halo APUs (gfx1151) experience OOM errors when loading large models via safetensors mmap due to unified memory architecture issues. This fix detects Strix Halo GPUs by checking the GPU architecture name (gfx1151) and forces a CPU\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44725",
-    "created_at": "2026-03-15T17:41:18Z",
-    "deletions": 29,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44786",
+    "created_at": "2026-03-17T08:17:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44725/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44725",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44786/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44786",
+    "labels": [],
     "merged": false,
-    "number": 44725,
+    "number": 44786,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace bare except with Exception in Fuyu image processing",
-    "updated_at": "2026-03-18T13:16:22Z"
-  },
-  {
-    "additions": 6,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? TO be explained.",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44724",
-    "created_at": "2026-03-15T17:14:12Z",
-    "deletions": 5,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44724/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44724",
-    "labels": [],
-    "merged": false,
-    "number": 44724,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fix some missing / incorrect entries in auto files",
-    "updated_at": "2026-03-16T09:59:56Z"
+    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-17T10:29:44Z"
   },
   {
-    "additions": 12,
-    "author": "aashirpersonal",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026",
-    "changed_files": 2,
+    "additions": 307,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44723",
-    "created_at": "2026-03-15T16:52:03Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44785",
+    "created_at": "2026-03-17T06:55:31Z",
+    "deletions": 83,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44723/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44723",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44785/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44785",
+    "labels": [],
     "merged": false,
-    "number": 44723,
+    "number": 44785,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
-    "updated_at": "2026-03-18T15:05:52Z"
+    "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-17T10:28:06Z"
   },
   {
-    "additions": 38,
-    "author": "chandan11248",
+    "additions": 2,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44722",
-    "created_at": "2026-03-15T15:33:25Z",
-    "deletions": 110,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44783",
+    "created_at": "2026-03-17T05:58:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44722/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44722",
+    "files_url": "https://github.com/huggingface/transformers/pull/44783/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44783",
     "labels": [],
     "merged": false,
-    "number": 44722,
+    "number": 44783,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Refactor gptj output tracing to use standardized decorators",
-    "updated_at": "2026-03-19T18:12:59Z"
+    "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer",
+    "updated_at": "2026-03-17T11:12:52Z"
   },
   {
-    "additions": 4,
-    "author": "rsmed31",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026",
+    "additions": 6,
+    "author": "JiwaniZakir",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44718",
-    "created_at": "2026-03-14T23:57:14Z",
-    "deletions": 3,
+    "cluster_id": "cluster-44737-6",
+    "cluster_ids": [
+      "cluster-44737-6"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44782",
+    "created_at": "2026-03-17T05:11:36Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44718/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44718",
+    "files_url": "https://github.com/huggingface/transformers/pull/44782/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44782",
     "labels": [],
-    "merged": false,
-    "number": 44718,
+    "merged": true,
+    "number": 44782,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
-    "updated_at": "2026-03-15T17:58:58Z"
+    "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward",
+    "updated_at": "2026-03-19T13:30:48Z"
   },
   {
-    "additions": 15,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).",
-    "changed_files": 7,
+    "additions": 5,
+    "author": "bensons",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44715",
-    "created_at": "2026-03-14T21:11:52Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44781",
+    "created_at": "2026-03-17T04:59:02Z",
+    "deletions": 2849,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44715/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44715",
+    "files_url": "https://github.com/huggingface/transformers/pull/44781/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44781",
     "labels": [],
-    "merged": true,
-    "number": 44715,
+    "merged": false,
+    "number": 44781,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix missing / incorrect `config` class in some model class definitions",
-    "updated_at": "2026-03-15T11:19:51Z"
+    "state": "open",
+    "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`",
+    "updated_at": "2026-03-27T23:19:21Z"
   },
   {
-    "additions": 181,
+    "additions": 145,
     "author": "LincolnBurrows2017",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026",
+    "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.",
     "changed_files": 8,
-    "cluster_id": "cluster-44625-9",
-    "cluster_ids": [
-      "cluster-44625-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44714",
-    "created_at": "2026-03-14T20:42:46Z",
-    "deletions": 26,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44778",
+    "created_at": "2026-03-16T23:41:29Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44714/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44714",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44778/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44778",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44714,
+    "number": 44778,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: propagate num_labels to text_config for Qwen models",
-    "updated_at": "2026-03-18T12:56:27Z"
+    "title": "fix: correct logic error in is_tiktoken_available function",
+    "updated_at": "2026-03-18T13:15:37Z"
   },
   {
-    "additions": 15,
-    "author": "kulkarni-rohan",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026",
-    "changed_files": 2,
+    "additions": 35,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44713",
-    "created_at": "2026-03-14T20:20:14Z",
-    "deletions": 28,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44777",
+    "created_at": "2026-03-16T21:58:40Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44713/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44713",
+    "files_url": "https://github.com/huggingface/transformers/pull/44777/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44777",
     "labels": [],
-    "merged": false,
-    "number": 44713,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[ColQwen2] Refactor output tracing (issue #43979)",
-    "updated_at": "2026-03-14T20:21:24Z"
+    "merged": true,
+    "number": 44777,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[docs] is_causal feature",
+    "updated_at": "2026-03-17T19:50:43Z"
   },
   {
-    "additions": 2,
-    "author": "ydshieh",
+    "additions": 0,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.",
+    "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44712",
-    "created_at": "2026-03-14T20:18:01Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44776",
+    "created_at": "2026-03-16T20:43:33Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44712/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44712",
+    "files_url": "https://github.com/huggingface/transformers/pull/44776/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44776",
     "labels": [],
     "merged": true,
-    "number": 44712,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Update Nvidia CI docker file to use torch 2.10",
-    "updated_at": "2026-03-14T20:29:04Z"
-  },
-  {
-    "additions": 339,
-    "author": "anuq",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44711",
-    "created_at": "2026-03-14T19:21:21Z",
-    "deletions": 205,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44711/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44711",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44711,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`",
-    "updated_at": "2026-03-20T13:36:58Z"
-  },
-  {
-    "additions": 12,
-    "author": "he-yufeng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44710",
-    "created_at": "2026-03-14T18:33:53Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44710/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44710",
-    "labels": [],
-    "merged": true,
-    "number": 44710,
+    "number": 44776,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs",
-    "updated_at": "2026-03-25T18:13:14Z"
+    "title": "[fix] mistral 4 docs",
+    "updated_at": "2026-03-16T21:11:29Z"
   },
   {
-    "additions": 6778,
-    "author": "LucasMa2025",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026",
-    "changed_files": 15,
+    "additions": 177,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44708",
-    "created_at": "2026-03-14T17:13:34Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44775",
+    "created_at": "2026-03-16T20:23:29Z",
+    "deletions": 109,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44708/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44708",
+    "files_url": "https://github.com/huggingface/transformers/pull/44775/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44775",
     "labels": [],
     "merged": false,
-    "number": 44708,
+    "number": 44775,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add Configurable Generation Scheduler and State Machine for `generate()`",
-    "updated_at": "2026-03-14T19:19:11Z"
+    "state": "open",
+    "title": "[docs] n-d parallelism",
+    "updated_at": "2026-03-16T20:28:48Z"
   },
   {
-    "additions": 3,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026",
-    "changed_files": 1,
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44707",
-    "created_at": "2026-03-14T17:12:16Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44774",
+    "created_at": "2026-03-16T18:56:19Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44707/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44707",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44707,
+    "files_url": "https://github.com/huggingface/transformers/pull/44774/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44774",
+    "labels": [],
+    "merged": true,
+    "number": 44774,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add position_ids to MptForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:39:36Z"
+    "title": "Remove `is_causal` from `EuroBertConfig`",
+    "updated_at": "2026-03-17T09:33:21Z"
   },
   {
     "additions": 3,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026",
-    "changed_files": 1,
+    "author": "githubnemo",
+    "author_association": "MEMBER",
+    "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44706",
-    "created_at": "2026-03-14T17:09:11Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44772",
+    "created_at": "2026-03-16T18:46:13Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44706/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44706",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44772/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44772",
+    "labels": [],
     "merged": false,
-    "number": 44706,
+    "number": 44772,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add position_ids to BloomForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:39:51Z"
+    "state": "open",
+    "title": "bitsandbytes: Update links and docs",
+    "updated_at": "2026-03-17T15:57:56Z"
   },
   {
-    "additions": 14,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026",
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? wtf",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44705",
-    "created_at": "2026-03-14T16:48:06Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44771",
+    "created_at": "2026-03-16T18:45:11Z",
     "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44705/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44705",
-    "labels": [
-      "Code agent slop"
-    ],
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44771/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44771",
+    "labels": [],
     "merged": false,
-    "number": 44705,
+    "number": 44771,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add position_ids to RoFormerForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:40:05Z"
+    "state": "open",
+    "title": "wtf",
+    "updated_at": "2026-03-16T18:56:00Z"
   },
   {
-    "additions": 26,
-    "author": "vasqu",
+    "additions": 203,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44703",
-    "created_at": "2026-03-14T14:46:02Z",
-    "deletions": 10,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44770",
+    "created_at": "2026-03-16T18:44:03Z",
+    "deletions": 268,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44703/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44703",
+    "files_url": "https://github.com/huggingface/transformers/pull/44770/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44770",
     "labels": [],
     "merged": true,
-    "number": 44703,
+    "number": 44770,
     "review_comments_count": 1,
     "state": "closed",
-    "title": "[`FA`] Fix fa detection",
-    "updated_at": "2026-03-14T17:19:07Z"
+    "title": "Fix configs with `@strict`",
+    "updated_at": "2026-03-17T15:39:43Z"
   },
   {
-    "additions": 148,
+    "additions": 145,
     "author": "LincolnBurrows2017",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026",
+    "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026",
     "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44702",
-    "created_at": "2026-03-14T14:41:15Z",
-    "deletions": 25,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44769",
+    "created_at": "2026-03-16T18:40:07Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44702/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44702",
+    "files_url": "https://github.com/huggingface/transformers/pull/44769/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44769",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44702,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig",
-    "updated_at": "2026-03-18T13:00:12Z"
-  },
-  {
-    "additions": 219,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026",
-    "changed_files": 52,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44699",
-    "created_at": "2026-03-14T11:44:24Z",
-    "deletions": 148,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44699/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44699",
-    "labels": [],
-    "merged": true,
-    "number": 44699,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix several based models' pipeline parallel support",
-    "updated_at": "2026-03-20T13:53:27Z"
-  },
-  {
-    "additions": 1,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44698",
-    "created_at": "2026-03-14T11:18:54Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44698/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44698",
-    "labels": [],
-    "merged": true,
-    "number": 44698,
+    "number": 44769,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `set_encoder`",
-    "updated_at": "2026-03-14T13:42:00Z"
-  },
-  {
-    "additions": 75,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44697",
-    "created_at": "2026-03-14T10:44:12Z",
-    "deletions": 25,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44697/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44697",
-    "labels": [],
-    "merged": false,
-    "number": 44697,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "fix: torch_float should return float, not int",
-    "updated_at": "2026-03-17T19:29:02Z"
+    "title": "Fix: Handle empty lists in video_utils functions",
+    "updated_at": "2026-03-18T13:15:55Z"
   },
   {
-    "additions": 19,
-    "author": "hmellor",
+    "additions": 20,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026",
+    "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44696",
-    "created_at": "2026-03-14T09:41:07Z",
-    "deletions": 13,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44696/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44696",
-    "labels": [],
-    "merged": true,
-    "number": 44696,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Fix `supports_{tp/pp}_plan`",
-    "updated_at": "2026-03-31T13:12:56Z"
-  },
-  {
-    "additions": 4,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44695",
-    "created_at": "2026-03-14T09:05:35Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44695/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44695",
-    "labels": [],
-    "merged": true,
-    "number": 44695,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "fix(testing): Fix Kyutai Speech-To-Text and LongCatFlash test failures on main CI",
-    "updated_at": "2026-04-09T15:41:05Z"
-  },
-  {
-    "additions": 143,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026",
-    "changed_files": 7,
-    "cluster_id": "cluster-44625-9",
-    "cluster_ids": [
-      "cluster-44625-9"
-    ],
-    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44693",
-    "created_at": "2026-03-14T05:43:00Z",
-    "deletions": 30,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44693/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44693",
-    "labels": [],
-    "merged": false,
-    "number": 44693,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Propagate num_labels to text_config in Qwen3.5",
-    "updated_at": "2026-03-18T12:56:25Z"
-  },
-  {
-    "additions": 18,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44692",
-    "created_at": "2026-03-14T04:14:38Z",
-    "deletions": 10,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44692/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44692",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44692,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template",
-    "updated_at": "2026-03-18T12:44:18Z"
-  },
-  {
-    "additions": 23,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44625-9",
-    "cluster_ids": [
-      "cluster-44625-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44691",
-    "created_at": "2026-03-14T04:10:54Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44691/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44691",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44691,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
-    "updated_at": "2026-03-18T12:57:19Z"
-  },
-  {
-    "additions": 6,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44360-6",
-    "cluster_ids": [
-      "cluster-44360-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44690",
-    "created_at": "2026-03-14T03:44:37Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44690/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44690",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44690,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring",
-    "updated_at": "2026-03-18T12:40:23Z"
-  },
-  {
-    "additions": 141,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026",
-    "changed_files": 6,
-    "cluster_id": "cluster-44625-9",
-    "cluster_ids": [
-      "cluster-44625-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44688",
-    "created_at": "2026-03-14T00:40:50Z",
-    "deletions": 23,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44688/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44688",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44688,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Propagate num_labels to text_config in Qwen models",
-    "updated_at": "2026-03-18T12:56:41Z"
-  },
-  {
-    "additions": 8,
-    "author": "vxa8502",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes partial #32937 Adds explicit `position_ids` threading through GPT-Neo's attention layers to enable flash attention's packed sequence optimization. ## Context GPT-Neo uses learned absolute position embeddings (`wpe`) applied at the mo\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44687",
-    "created_at": "2026-03-13T23:28:55Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44768",
+    "created_at": "2026-03-16T18:29:52Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44687/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44687",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44687,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44768/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44768",
+    "labels": [],
+    "merged": true,
+    "number": 44768,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Add explicit position_ids to GPT-Neo attention layers",
-    "updated_at": "2026-03-18T13:06:49Z"
+    "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026",
+    "updated_at": "2026-04-09T17:11:55Z"
   },
   {
-    "additions": 615,
-    "author": "tejasae-afk",
-    "author_association": "NONE",
-    "body_excerpt": "During an automated code review of src/transformers/models/marian/convert_marian_to_pytorch.py, the following issue was identified. Use safe_load in convert marian to pytorch. yaml.load on untrusted input can construct arbitrary Python obj\u2026",
-    "changed_files": 80,
+    "additions": 11,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44686",
-    "created_at": "2026-03-13T21:22:07Z",
-    "deletions": 259,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44767",
+    "created_at": "2026-03-16T17:31:26Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44686/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44686",
+    "files_url": "https://github.com/huggingface/transformers/pull/44767/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44767",
     "labels": [],
-    "merged": false,
-    "number": 44686,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44767,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Use safe_load in convert marian to pytorch",
-    "updated_at": "2026-03-14T03:54:31Z"
+    "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs",
+    "updated_at": "2026-03-16T19:02:31Z"
   },
   {
-    "additions": 10,
-    "author": "ydshieh",
+    "additions": 26,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? For tiny model creation script - new added model test files still miss this argument ...",
-    "changed_files": 3,
+    "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44685",
-    "created_at": "2026-03-13T20:53:41Z",
-    "deletions": 3,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44766",
+    "created_at": "2026-03-16T17:30:23Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44685/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44685",
+    "files_url": "https://github.com/huggingface/transformers/pull/44766/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44766",
     "labels": [],
     "merged": true,
-    "number": 44685,
+    "number": 44766,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix more model tester missing `parent` issue",
-    "updated_at": "2026-03-13T21:03:46Z"
+    "title": "support xxxFast alias in v5 tokenizers",
+    "updated_at": "2026-03-18T13:40:05Z"
   },
   {
-    "additions": 41,
-    "author": "ntenenz",
+    "additions": 19,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026 # What does this PR do? In torch versions >= 2.9.0, it requests the lse from flex_attenetion using `AuxRequest` instead of the deprecated `return_lse`, which triggers a warning and can break tracing. Fixes #44683 ## Before submitting - [\u2026",
-    "changed_files": 1,
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44684",
-    "created_at": "2026-03-13T20:16:35Z",
-    "deletions": 5,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44765",
+    "created_at": "2026-03-16T17:26:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44684/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44684",
+    "files_url": "https://github.com/huggingface/transformers/pull/44765/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44765",
     "labels": [],
     "merged": true,
-    "number": 44684,
-    "review_comments_count": 8,
+    "number": 44765,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9",
-    "updated_at": "2026-03-18T11:44:18Z"
+    "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main",
+    "updated_at": "2026-03-20T13:55:55Z"
   },
   {
-    "additions": 301,
-    "author": "SunMarc",
+    "additions": 12,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44682",
-    "created_at": "2026-03-13T18:52:41Z",
-    "deletions": 73,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44764",
+    "created_at": "2026-03-16T17:15:40Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44682/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44682",
+    "files_url": "https://github.com/huggingface/transformers/pull/44764/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44764",
     "labels": [],
-    "merged": false,
-    "number": 44682,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "transformers serve + llamacpp",
-    "updated_at": "2026-03-14T07:05:29Z"
+    "merged": true,
+    "number": 44764,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix: sig lip import",
+    "updated_at": "2026-03-16T17:38:41Z"
   },
   {
-    "additions": 47,
-    "author": "dacorvo",
+    "additions": 17,
+    "author": "xenova",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44680",
-    "created_at": "2026-03-13T17:55:54Z",
-    "deletions": 1,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44763",
+    "created_at": "2026-03-16T17:04:36Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44680/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44680",
+    "files_url": "https://github.com/huggingface/transformers/pull/44763/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44763",
     "labels": [],
     "merged": false,
-    "number": 44680,
-    "review_comments_count": 12,
-    "state": "open",
-    "title": "Allow kernel modules to declare their preferred mask function",
-    "updated_at": "2026-03-19T11:27:09Z"
+    "number": 44763,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[nemotron_h] Add support for MLP mixers",
+    "updated_at": "2026-04-14T13:46:14Z"
   },
   {
-    "additions": 9,
-    "author": "JokeYoonic",
+    "additions": 4,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026",
+    "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44676",
-    "created_at": "2026-03-13T16:28:01Z",
-    "deletions": 2,
+    "cluster_id": "cluster-44737-6",
+    "cluster_ids": [
+      "cluster-44737-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44762",
+    "created_at": "2026-03-16T16:17:54Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44676/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44676",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44762/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44762",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44676,
+    "number": 44762,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights",
-    "updated_at": "2026-03-18T17:16:49Z"
+    "state": "closed",
+    "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation",
+    "updated_at": "2026-03-18T15:16:14Z"
   },
   {
-    "additions": 32,
-    "author": "stevhliu",
+    "additions": 152,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: <img width=\"976\" height=\"626\" alt=\"Screenshot 2026-03-13 at 9 09 39 AM\" src=\"https://github.com/user-attachments/assets/4390c6f7-bb63-4039-a46e-9f4ae23f5d98\" />",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44675",
-    "created_at": "2026-03-13T16:10:28Z",
-    "deletions": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44761",
+    "created_at": "2026-03-16T16:05:03Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44675/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44675",
+    "files_url": "https://github.com/huggingface/transformers/pull/44761/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44761",
     "labels": [],
     "merged": true,
-    "number": 44675,
-    "review_comments_count": 0,
+    "number": 44761,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "[docs] cb config",
-    "updated_at": "2026-03-13T23:15:04Z"
+    "title": "model-linter: Added rule 10",
+    "updated_at": "2026-03-17T08:52:19Z"
   },
   {
-    "additions": 408,
-    "author": "Rocketknight1",
+    "additions": 2090,
+    "author": "juliendenize",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44760",
+    "created_at": "2026-03-16T15:54:11Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44760/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44760",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 44760,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "Add Mistral 4",
+    "updated_at": "2026-03-20T10:44:48Z"
+  },
+  {
+    "additions": 419,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "We've had `parse_response()` in the library for a while, but it's been a soft launch / prototype feature. This PR cleans it up and documents it, making it an official feature! The API is largely unchanged from the prototype, but we drop `x\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Follow-up of many related PR, last one in time being https://github.com/huggingface/transformers/pull/44602. This PR completes all the models that may need non-trivial treatment. Only about 30-40 models still have m\u2026",
+    "changed_files": 42,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44674",
-    "created_at": "2026-03-13T15:41:42Z",
-    "deletions": 34,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44759",
+    "created_at": "2026-03-16T15:38:13Z",
+    "deletions": 983,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44674/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44674",
+    "files_url": "https://github.com/huggingface/transformers/pull/44759/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44759",
     "labels": [],
     "merged": true,
-    "number": 44674,
-    "review_comments_count": 11,
+    "number": 44759,
+    "review_comments_count": 26,
     "state": "closed",
-    "title": "Officially launch parse_response",
-    "updated_at": "2026-03-24T15:55:05Z"
+    "title": "Remove cache_position in more models (3)",
+    "updated_at": "2026-03-18T13:09:37Z"
   },
   {
-    "additions": 73,
-    "author": "remi-or",
+    "additions": 825,
+    "author": "LysandreJik",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR fixes a bug in continuous batching where non-CUDA devices cannot use the feature because some CUDA-exclusive objects are always instantiated. It also adds a test to make sure this will not break again in the future.",
-    "changed_files": 3,
+    "body_excerpt": "Provides a nicer feedback when `transformers chat` loads a model, instead of hanging https://github.com/user-attachments/assets/8f68f914-b702-4430-b97f-e8cc25326b70 <p>Adds a <code>POST /load_model</code> endpoint to <code>transformers ser\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44673",
-    "created_at": "2026-03-13T15:37:01Z",
-    "deletions": 15,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44758",
+    "created_at": "2026-03-16T15:02:15Z",
+    "deletions": 63,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44673/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44673",
+    "files_url": "https://github.com/huggingface/transformers/pull/44758/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44758",
     "labels": [],
     "merged": true,
-    "number": 44673,
-    "review_comments_count": 0,
+    "number": 44758,
+    "review_comments_count": 20,
     "state": "closed",
-    "title": "[CB] [Bug] Fix crashes when running without cuda",
-    "updated_at": "2026-03-15T23:59:55Z"
+    "title": "Propagate the model loading from transformers serve to chat",
+    "updated_at": "2026-03-19T17:20:03Z"
   },
   {
     "additions": 1,
-    "author": "neo",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - `_valid_auto_compile_criteria()` gates auto-compilation on `device.type in [\"cuda\", \"xpu\"]`, excluding Neuron devices. This means `torch.compile` never triggers automatically on Neuron even when `StaticCache` is used (which se\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44672",
-    "created_at": "2026-03-13T15:33:15Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44757",
+    "created_at": "2026-03-16T14:54:38Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44672/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44672",
+    "files_url": "https://github.com/huggingface/transformers/pull/44757/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44757",
     "labels": [],
-    "merged": true,
-    "number": 44672,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix annotations reader for python 3.14 in `PreTrainedModel`",
-    "updated_at": "2026-03-19T13:30:48Z"
+    "merged": false,
+    "number": 44757,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Add Neuron to auto-compile hardware list",
+    "updated_at": "2026-04-14T12:43:21Z"
   },
   {
-    "additions": 6,
-    "author": "dacorvo",
+    "additions": 4,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44678 ## Summary - Replace advanced indexing (`self.gate_up_proj[expert_ids]`) with explicit `torch.index_select(self.gate_up_proj, 0, expert_ids)` in `batched_mm_experts_forward` - 6 replacements total (3 weight tensors + 3 bias te\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44669",
-    "created_at": "2026-03-13T14:52:22Z",
-    "deletions": 6,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44755",
+    "created_at": "2026-03-16T14:08:34Z",
+    "deletions": 148,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44669/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44669",
+    "files_url": "https://github.com/huggingface/transformers/pull/44755/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44755",
     "labels": [],
     "merged": false,
-    "number": 44669,
+    "number": 44755,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Use `index_select` instead of advanced indexing in `batched_mm_experts_forward`",
-    "updated_at": "2026-03-19T13:39:23Z"
+    "title": "Dont merge, testing smth",
+    "updated_at": "2026-03-18T10:09:15Z"
   },
   {
-    "additions": 18,
-    "author": "dacorvo",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44677 ## Summary - Add `base_model_tp_plan` to `OlmoeConfig`, enabling `from_pretrained(tp_plan=\"auto\")` for OLMoE models - Add `TensorParallelTesterMixin` to OLMoE tests for TP validation coverage - Uses `\"colwise\"` for `q_norm` an\u2026",
+    "additions": 20,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44737 - Added `device=self.device` to all four `torch.arange()` calls in `XLNetModel.relative_positional_encoding()` so that intermediate tensors are created directly on the model's device instead of always on CPU. - With\u2026",
     "changed_files": 2,
+    "cluster_id": "cluster-44737-6",
+    "cluster_ids": [
+      "cluster-44737-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44753",
+    "created_at": "2026-03-16T14:01:08Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44753/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44753",
+    "labels": [],
+    "merged": false,
+    "number": 44753,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: add device= to torch.arange in XLNet relative_positional_encoding",
+    "updated_at": "2026-03-18T13:28:40Z"
+  },
+  {
+    "additions": 100,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR fix? The `flatten()` function in `tokenization_utils_base.py` had a bug where it was checking `arr[0]` instead of `sub_arr` when determining if an element should be recursively flattened. ### Bug Details - **File**: `s\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44668",
-    "created_at": "2026-03-13T14:45:22Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44751",
+    "created_at": "2026-03-16T13:40:44Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44668/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44668",
-    "labels": [],
-    "merged": true,
-    "number": 44668,
-    "review_comments_count": 4,
+    "files_url": "https://github.com/huggingface/transformers/pull/44751/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44751",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44751,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `base_model_tp_plan` to `OlmoeConfig`",
-    "updated_at": "2026-03-26T13:58:58Z"
+    "title": "Fix: Correct variable reference in flatten() function",
+    "updated_at": "2026-03-18T13:16:12Z"
   },
   {
-    "additions": 412,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A few find-and-replaces and cache position is deleted from the rest of models. Still have to check docs and test files, so WIP",
-    "changed_files": 120,
+    "additions": 1,
+    "author": "juliendenize",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds `apply_yarn_scaling` as an optional key for yarn repo. This was requested as part of a vLLM PR https://github.com/vllm-project/vllm/pull/37104 that seeks to silence some rope issues when converting Mist\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44667",
-    "created_at": "2026-03-13T14:37:26Z",
-    "deletions": 1519,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44747",
+    "created_at": "2026-03-16T10:32:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44667/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44667",
+    "files_url": "https://github.com/huggingface/transformers/pull/44747/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44747",
     "labels": [],
     "merged": false,
-    "number": 44667,
+    "number": 44747,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Goodbye cache position",
-    "updated_at": "2026-03-19T11:55:04Z"
+    "title": "Add apply_yarn_scaling as optional key to yarn",
+    "updated_at": "2026-03-16T12:48:08Z"
   },
   {
-    "additions": 17,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - 2 model tester classes didn't follow the usual way we do things, which cause the tiny model creation script to fail with those model classes. - (the script initializes instances of model testers, in order to call\u2026",
-    "changed_files": 3,
+    "additions": 202,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed Issue #44737: XLNet relative_positional_encoding function missing device parameter in torch.arange calls.",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44666",
-    "created_at": "2026-03-13T14:24:50Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44745",
+    "created_at": "2026-03-16T09:39:30Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44666/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44666",
-    "labels": [],
-    "merged": true,
-    "number": 44666,
+    "files_url": "https://github.com/huggingface/transformers/pull/44745/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44745",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44745,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Another (small) set of fixes required for tiny model creation",
-    "updated_at": "2026-03-13T17:20:52Z"
+    "title": "fix: add device parameter to torch.arange calls in XLNet",
+    "updated_at": "2026-03-18T13:16:43Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Dinov3 vit was refactored to introduce a module between top level and layers to have the capture decorators work as intended. Otherwise, it would force the backbone to do manual collection. This introduced a small conversion which is now a\u2026",
-    "changed_files": 1,
+    "additions": 35,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44665",
-    "created_at": "2026-03-13T13:50:26Z",
-    "deletions": 0,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44739",
+    "created_at": "2026-03-16T07:15:33Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44665/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44665",
+    "files_url": "https://github.com/huggingface/transformers/pull/44739/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44739",
     "labels": [],
     "merged": true,
-    "number": 44665,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[`Chmv2`] Fix conversion after capture refactor",
-    "updated_at": "2026-03-13T14:28:33Z"
+    "number": 44739,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "fix series of failed test case for janus model",
+    "updated_at": "2026-04-01T08:24:26Z"
   },
   {
-    "additions": 18,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/44625 and https://github.com/huggingface/transformers/pull/44406#issuecomment-4203794413 We can create a clf model with LLM or VLM backbone like: ```python from transformers import A\u2026",
-    "changed_files": 8,
+    "additions": 6,
+    "author": "yunhaoli24",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44031 ## The Problem The condition for calling `_patch_mistral_regex` was too broad (`vocab_size > 100000`), causing non-Mistral models like Qwen, LLaMA, BGE-Reranker to show incorrect regex pattern warnings\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44664",
-    "created_at": "2026-03-13T13:39:52Z",
-    "deletions": 193,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44736",
+    "created_at": "2026-03-16T06:00:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44664/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44664",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44736/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44736",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44664,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Generic Sequence Classifier works for multimodal models",
-    "updated_at": "2026-04-10T10:36:04Z"
+    "number": 44736,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: resolve false-positive regex warning for non-mistral models",
+    "updated_at": "2026-03-18T15:08:59Z"
   },
   {
     "additions": 1,
-    "author": "Ker102",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44661 \u2014 `transformers add-new-model-like` crashes with `AttributeError: 'ModelInfos' object has no attribute 'tokenizer_class'` when selecting a model that is in `TOKENIZER_MAPPING_NAMES`. ## Root Cause PR #40936 refactor\u2026",
+    "author": "mango766",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `transformers serve` when the `/v1/responses` streaming endpoint attempts to reuse a KV cache from a previous request in the same conversation session. ### The bug In `generate_response`, `inputs`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44663",
-    "created_at": "2026-03-13T13:25:48Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44735",
+    "created_at": "2026-03-16T04:09:32Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44663/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44663",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44663,
+    "files_url": "https://github.com/huggingface/transformers/pull/44735/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44735",
+    "labels": [],
+    "merged": true,
+    "number": 44735,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: restore missing `tokenizer_class` attribute in `ModelInfos.__init__`",
-    "updated_at": "2026-03-13T14:02:00Z"
+    "title": "Fix tensor indexing crash in serve generate_response KV cache continuation",
+    "updated_at": "2026-03-16T15:27:59Z"
   },
   {
-    "additions": 7084,
-    "author": "CyrilSterling",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR supports PenguinVL model. Paper: https://arxiv.org/abs/2603.06569 Github repo: https://github.com/tencent-ailab/Penguin-VL HuggingFace Model: https://huggingface.co/collections/tencent/ai-lab ## Before submi\u2026",
-    "changed_files": 20,
+    "additions": 28,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil, pls help review, thx!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44662",
-    "created_at": "2026-03-13T13:02:26Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44733",
+    "created_at": "2026-03-16T02:55:54Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44662/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44662",
+    "files_url": "https://github.com/huggingface/transformers/pull/44733/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44733",
     "labels": [],
-    "merged": false,
-    "number": 44662,
-    "review_comments_count": 95,
-    "state": "open",
-    "title": "[model] Add PenguinVL implementation",
-    "updated_at": "2026-04-01T10:53:25Z"
+    "merged": true,
+    "number": 44733,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix bug and add XPU Expectations for qwen2 and jamba tests",
+    "updated_at": "2026-04-01T08:24:40Z"
   },
   {
-    "additions": 5,
-    "author": "DogWala",
+    "additions": 1,
+    "author": "Defalt-Meh",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44637 This PR makes the PEFT `load_best_model_at_end` path in `Trainer` use a CPU-first adapter reload path during best-model loading. Previously, when training a PEFT model, `Trainer` could reload the best a\u2026",
+    "body_excerpt": "# What does this PR do? While running SmolVLM tests I noticed this warning in the output: ``` tests/test_video_processing_common.py:57: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider convert\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44660",
-    "created_at": "2026-03-13T12:59:25Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44731",
+    "created_at": "2026-03-15T23:26:31Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44660/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44660",
+    "files_url": "https://github.com/huggingface/transformers/pull/44731/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44731",
     "labels": [],
     "merged": false,
-    "number": 44660,
+    "number": 44731,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: avoid late CUDA OOM in load_best_model_at_end with PEFT models",
-    "updated_at": "2026-03-13T13:18:52Z"
+    "title": "[Tests] Fix slow video tensor creation from list of numpy arrays in SmolVLM",
+    "updated_at": "2026-03-15T23:26:31Z"
   },
   {
-    "additions": 0,
-    "author": "Olexandr88",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Removes outdated use_diff entry from the docstring. The parameter is not present in the method signature or implementation.",
-    "changed_files": 1,
+    "additions": 9,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? It's unclear why the config class has `model_type = \"mlcd_vision_model\"` but the model on the hub has \"model_type\": \"mlcd\". This leads to the following failures (load from hub --> save locally --> local locally) ```\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44659",
-    "created_at": "2026-03-13T11:08:13Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44730",
+    "created_at": "2026-03-15T20:44:32Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44659/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44659",
+    "files_url": "https://github.com/huggingface/transformers/pull/44730/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44730",
     "labels": [],
-    "merged": false,
-    "number": 44659,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "docs: remove outdated use_diff docstring from DistributedConfig.to_js\u2026",
-    "updated_at": "2026-03-13T13:42:29Z"
+    "merged": true,
+    "number": 44730,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Fix `mlcd` auto config/model/mapping issues",
+    "updated_at": "2026-03-16T12:12:30Z"
   },
   {
-    "additions": 18,
-    "author": "Charly21r",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an issue where the `.modelcard` attribute of a pipeline is not initialized. Without this fix, calling `save_pretrained` on a pipeline (e.g., ASR pipeline) raises an `AttributeError` because `.modelcard` does n\u2026",
-    "changed_files": 2,
+    "additions": 214,
+    "author": "xenova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026",
+    "changed_files": 58,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44658",
-    "created_at": "2026-03-13T10:36:22Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44729",
+    "created_at": "2026-03-15T20:29:38Z",
+    "deletions": 225,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44658/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44658",
+    "files_url": "https://github.com/huggingface/transformers/pull/44729/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44729",
     "labels": [],
     "merged": false,
-    "number": 44658,
+    "number": 44729,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: fix Pipeline's save_pretrained method (issue #44655)",
-    "updated_at": "2026-03-13T14:08:27Z"
+    "state": "open",
+    "title": "Avoid floating point math for ceil operations",
+    "updated_at": "2026-03-15T20:49:34Z"
   },
   {
-    "additions": 1,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review. Thx!",
-    "changed_files": 1,
+    "additions": 88,
+    "author": "ajmeese7",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44657",
-    "created_at": "2026-03-13T10:25:07Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44728",
+    "created_at": "2026-03-15T19:56:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44657/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44657",
+    "files_url": "https://github.com/huggingface/transformers/pull/44728/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44728",
     "labels": [],
-    "merged": true,
-    "number": 44657,
+    "merged": false,
+    "number": 44728,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix bug embedding_size mismatch with hidden_size in electra model test",
-    "updated_at": "2026-04-01T08:24:54Z"
+    "title": "Fix float16 memory leak during 4-bit quantized model loading",
+    "updated_at": "2026-03-16T20:53:54Z"
   },
   {
-    "additions": 80,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR add support to mistral-common v1.10.0. This involves: - reasoning effort feature - Python 3.14 Also add `image_sizes` in `apply_chat_template` output to match what is expected by standard processor.",
-    "changed_files": 4,
+    "additions": 202,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44656",
-    "created_at": "2026-03-13T10:24:11Z",
-    "deletions": 15,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44727",
+    "created_at": "2026-03-15T19:41:24Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44656/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44656",
-    "labels": [],
-    "merged": true,
-    "number": 44656,
-    "review_comments_count": 1,
+    "files_url": "https://github.com/huggingface/transformers/pull/44727/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44727",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44727,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[MistralCommonBackend] Upgrade mistral-common to v1.10.0",
-    "updated_at": "2026-03-16T12:46:29Z"
+    "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file",
+    "updated_at": "2026-03-18T13:15:46Z"
   },
   {
-    "additions": 13,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes the AWQ test suite to align with the GPTQModel migration (following #41567 and #42776). ### Changes - **Fix `replace_with_awq_linear` return value**: The function now returns the model directly instead of a tuple `(model, _)`\u2026",
-    "changed_files": 1,
+    "additions": 198,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44654",
-    "created_at": "2026-03-13T07:31:19Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44725",
+    "created_at": "2026-03-15T17:41:18Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44654/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44654",
-    "labels": [],
-    "merged": true,
-    "number": 44654,
+    "files_url": "https://github.com/huggingface/transformers/pull/44725/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44725",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44725,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AWQ tests for GPTQModel migration",
-    "updated_at": "2026-03-13T16:28:14Z"
+    "title": "fix: replace bare except with Exception in Fuyu image processing",
+    "updated_at": "2026-03-18T13:16:22Z"
   },
   {
-    "additions": 18,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@zucchini-nlp, can you help review? Thx! unit tests to reproduce this bug: `tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py::Phi4MultimodalIntegrationTest::test_audio_text_generation`",
-    "changed_files": 1,
+    "additions": 6,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? TO be explained.",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44653",
-    "created_at": "2026-03-13T07:14:25Z",
-    "deletions": 9,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44653/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44653",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44724",
+    "created_at": "2026-03-15T17:14:12Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44724/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44724",
     "labels": [],
     "merged": false,
-    "number": 44653,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "Fix `AutoImageProcessor` to correctly detect local implementation whe\u2026",
-    "updated_at": "2026-04-09T02:32:53Z"
+    "number": 44724,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Fix some missing / incorrect entries in auto files",
+    "updated_at": "2026-03-16T09:59:56Z"
   },
   {
-    "additions": 8,
-    "author": "gambletan",
+    "additions": 12,
+    "author": "aashirpersonal",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44623 `processor.save_pretrained()` in v5 only saves the unified `processor_config.json` with nested sub-processor configs, but does not save standalone config files like `preprocessor_config.json` for the image processor\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44652",
-    "created_at": "2026-03-13T05:38:10Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44723",
+    "created_at": "2026-03-15T16:52:03Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44652/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44652",
+    "files_url": "https://github.com/huggingface/transformers/pull/44723/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44723",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44652,
+    "number": 44723,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix processor.save_pretrained missing sub-processor config files",
-    "updated_at": "2026-03-13T12:03:37Z"
+    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
+    "updated_at": "2026-03-18T15:05:52Z"
   },
   {
-    "additions": 10,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44625 When passing `num_labels` to `AutoConfig.from_pretrained` for Qwen3.5, the value is set on the outer `Qwen3_5Config` but not propagated to `text_config`. This causes `AutoModelForSequenceClassification` to use the d\u2026",
+    "additions": 38,
+    "author": "chandan11248",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-44625-9",
+    "cluster_id": "cluster-43979-24",
     "cluster_ids": [
-      "cluster-44625-9"
+      "cluster-43979-24"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44651",
-    "created_at": "2026-03-13T05:35:29Z",
-    "deletions": 0,
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44722",
+    "created_at": "2026-03-15T15:33:25Z",
+    "deletions": 110,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44651/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44651",
+    "files_url": "https://github.com/huggingface/transformers/pull/44722/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44722",
     "labels": [],
     "merged": false,
-    "number": 44651,
+    "number": 44722,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
-    "updated_at": "2026-03-13T13:39:43Z"
+    "state": "open",
+    "title": "Refactor gptj output tracing to use standardized decorators",
+    "updated_at": "2026-03-19T18:12:59Z"
   },
   {
-    "additions": 188,
-    "author": "shaealh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Closes #44593 ## Summary - use generation_input_ids/generation_attention_mask when provided for decoder-only models - otherwise infer prompt from leading -100 labels and build left-padded prompt batch - return completion tokens for decoder\u2026",
-    "changed_files": 2,
+    "additions": 4,
+    "author": "rsmed31",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44650",
-    "created_at": "2026-03-13T05:33:59Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44718",
+    "created_at": "2026-03-14T23:57:14Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44650/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44650",
+    "files_url": "https://github.com/huggingface/transformers/pull/44718/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44718",
     "labels": [],
     "merged": false,
-    "number": 44650,
+    "number": 44718,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Seq2SeqTrainer generation path for decoder-only models",
-    "updated_at": "2026-04-02T21:23:53Z"
+    "state": "closed",
+    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
+    "updated_at": "2026-03-15T17:58:58Z"
   },
   {
-    "additions": 12,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44623 In v5.3.0, `ProcessorMixin.save_pretrained()` no longer calls `save_pretrained()` on non-tokenizer components (e.g. `image_processor`, `feature_extractor`). This means their individual config files (e.g. `preprocess\u2026",
-    "changed_files": 1,
+    "additions": 15,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44649",
-    "created_at": "2026-03-13T05:22:42Z",
-    "deletions": 207,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44715",
+    "created_at": "2026-03-14T21:11:52Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44649/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44649",
+    "files_url": "https://github.com/huggingface/transformers/pull/44715/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44715",
     "labels": [],
-    "merged": false,
-    "number": 44649,
+    "merged": true,
+    "number": 44715,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: ensure processor.save_pretrained saves all component files",
-    "updated_at": "2026-03-13T05:36:54Z"
+    "title": "Fix missing / incorrect `config` class in some model class definitions",
+    "updated_at": "2026-03-15T11:19:51Z"
   },
   {
-    "additions": 0,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44625 When `num_labels` or `id2label` are passed to `Qwen3_5Config` (e.g. via `AutoConfig.from_pretrained(model_name, num_labels=1)`), they are only set on the outer composite config but **not forwarded to `text_config`**\u2026",
-    "changed_files": 0,
+    "additions": 181,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026",
+    "changed_files": 8,
     "cluster_id": "cluster-44625-9",
     "cluster_ids": [
       "cluster-44625-9"
     ],
     "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44648",
-    "created_at": "2026-03-13T05:22:03Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44714",
+    "created_at": "2026-03-14T20:42:46Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44648/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44648",
+    "files_url": "https://github.com/huggingface/transformers/pull/44714/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44714",
     "labels": [],
     "merged": false,
-    "number": 44648,
+    "number": 44714,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: propagate num_labels to text_config in Qwen3.5",
-    "updated_at": "2026-03-13T05:33:26Z"
+    "title": "fix: propagate num_labels to text_config for Qwen models",
+    "updated_at": "2026-03-18T12:56:27Z"
   },
   {
-    "additions": 9,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@remi-or @ArthurZucker @McPatate pls help review, thx!",
+    "additions": 15,
+    "author": "kulkarni-rohan",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44713",
+    "created_at": "2026-03-14T20:20:14Z",
+    "deletions": 28,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44713/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44713",
+    "labels": [],
+    "merged": false,
+    "number": 44713,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ColQwen2] Refactor output tracing (issue #43979)",
+    "updated_at": "2026-03-14T20:21:24Z"
+  },
+  {
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44712",
+    "created_at": "2026-03-14T20:18:01Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44712/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44712",
+    "labels": [],
+    "merged": true,
+    "number": 44712,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update Nvidia CI docker file to use torch 2.10",
+    "updated_at": "2026-03-14T20:29:04Z"
+  },
+  {
+    "additions": 339,
+    "author": "anuq",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44647",
-    "created_at": "2026-03-13T04:55:26Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44711",
+    "created_at": "2026-03-14T19:21:21Z",
+    "deletions": 205,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44647/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44647",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44711/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44711",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44647,
+    "number": 44711,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add more generic device support for continuous batching",
-    "updated_at": "2026-04-09T02:32:51Z"
+    "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`",
+    "updated_at": "2026-03-20T13:36:58Z"
   },
   {
-    "additions": 4,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed typo in Qwen3.5 and Qwen3-VL-MoE model files ```diff - Since Qwen3.5 use timestamps to seperate videos + Since Qwen3.5 use timestamps to separate videos ```",
-    "changed_files": 2,
+    "additions": 12,
+    "author": "he-yufeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44646",
-    "created_at": "2026-03-13T04:48:06Z",
-    "deletions": 4,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44710",
+    "created_at": "2026-03-14T18:33:53Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44646/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44646",
+    "files_url": "https://github.com/huggingface/transformers/pull/44710/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44710",
     "labels": [],
-    "merged": false,
-    "number": 44646,
+    "merged": true,
+    "number": 44710,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix typo: seperate -> separate",
-    "updated_at": "2026-03-13T15:27:08Z"
+    "state": "closed",
+    "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs",
+    "updated_at": "2026-03-25T18:13:14Z"
   },
   {
-    "additions": 4,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- Intel XPU: @IlyasMoutawwakil ``` ======================================================================== FAILURES ======================================================================== _________________________________________________\u2026",
-    "changed_files": 3,
+    "additions": 6778,
+    "author": "LucasMa2025",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44645",
-    "created_at": "2026-03-13T02:53:31Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44645/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44645",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44708",
+    "created_at": "2026-03-14T17:13:34Z",
+    "deletions": 7,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44708/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44708",
     "labels": [],
-    "merged": true,
-    "number": 44645,
+    "merged": false,
+    "number": 44708,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix Image.open failure in case \"tests/models/prompt_depth_anything/te\u2026",
-    "updated_at": "2026-03-27T11:11:05Z"
+    "title": "Add Configurable Generation Scheduler and State Machine for `generate()`",
+    "updated_at": "2026-03-14T19:19:11Z"
   },
   {
-    "additions": 2,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026MXFP4PseudoquantTest::test_quantized_model fail in xpu ## Who can review? - quantization: @SunMarc - Intel XPU: @IlyasMoutawwakil",
+    "additions": 3,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44644",
-    "created_at": "2026-03-13T02:02:45Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44707",
+    "created_at": "2026-03-14T17:12:16Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44644/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44644",
-    "labels": [],
-    "merged": true,
-    "number": 44644,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44707/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44707",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44707,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix tests/quantization/fp_quant_integration/test_fp_quant.py::FPQuant\u2026",
-    "updated_at": "2026-03-27T14:14:13Z"
+    "title": "Add position_ids to MptForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:39:36Z"
   },
   {
-    "additions": 5,
-    "author": "joshuaswanson",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The generic `labels` docstring in `ModelArgs` says \"masked language modeling loss\" and doesn't mention that causal LM models shift labels internally. This has tripped up a lot of users who pre-shift their labels and end up training next-ne\u2026",
+    "additions": 3,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44642",
-    "created_at": "2026-03-12T23:47:11Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44706",
+    "created_at": "2026-03-14T17:09:11Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44642/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44642",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44706/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44706",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44642,
+    "number": 44706,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Clarify that causal LM labels are shifted internally",
-    "updated_at": "2026-03-13T00:02:30Z"
+    "state": "closed",
+    "title": "Add position_ids to BloomForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:39:51Z"
   },
   {
-    "additions": 1,
-    "author": "kmbhattt-aws",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Issue: A full 4D attention mask of shape `[1, 1, seq_len, seq_len]` is being created during attention, even when not using alibi for positional embeddings. - This occupied extra memory during training. Root Cause: T\u2026",
+    "additions": 14,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44641",
-    "created_at": "2026-03-12T23:01:11Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44705",
+    "created_at": "2026-03-14T16:48:06Z",
     "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44641/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44641",
-    "labels": [],
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44705/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44705",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44641,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Conditinally passing and_mask_function arg to create_causal_mask ",
-    "updated_at": "2026-03-13T02:09:22Z"
+    "number": 44705,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add position_ids to RoFormerForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:40:05Z"
   },
   {
-    "additions": 11,
-    "author": "michaelbenayoun",
+    "additions": 26,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Add `\"embedding_colwise\"` tp plan - Add register methods for `ParallelInterface`. Without it, we can register plans with the register method, but not the corresponding `plan_to_weight_dim` and `plan_to_bias_dim`.",
-    "changed_files": 1,
+    "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44640",
-    "created_at": "2026-03-12T20:14:06Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44703",
+    "created_at": "2026-03-14T14:46:02Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44640/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44640",
+    "files_url": "https://github.com/huggingface/transformers/pull/44703/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44703",
     "labels": [],
     "merged": true,
-    "number": 44640,
-    "review_comments_count": 0,
+    "number": 44703,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add register method for `ParallelInterface`",
-    "updated_at": "2026-03-13T18:57:48Z"
+    "title": "[`FA`] Fix fa detection",
+    "updated_at": "2026-03-14T17:19:07Z"
   },
   {
-    "additions": 24099,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary Fixes bugs introduced during the `__init__` \u2192 `@dataclass` conversion in #41250. All are incorrect default values caught by automated comparison of `__init__` signatures (main) vs dataclass fields (PR). | Model | Param | Was | S\u2026",
-    "changed_files": 931,
+    "additions": 148,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44639",
-    "created_at": "2026-03-12T16:49:54Z",
-    "deletions": 38773,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44702",
+    "created_at": "2026-03-14T14:41:15Z",
+    "deletions": 25,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44639/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44639",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44702/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44702",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44639,
+    "number": 44702,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix incorrect default values in config dataclass migration",
-    "updated_at": "2026-03-12T16:50:10Z"
+    "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig",
+    "updated_at": "2026-03-18T13:00:12Z"
   },
   {
-    "additions": 19,
-    "author": "Cyrilvallez",
+    "additions": 219,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/44614. This one is the result of a long debugging session and discussion with @vasqu. The issue is as follow: - Backbone ALWAYS need to c\u2026",
-    "changed_files": 3,
+    "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026",
+    "changed_files": 52,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44638",
-    "created_at": "2026-03-12T16:19:49Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44699",
+    "created_at": "2026-03-14T11:44:24Z",
+    "deletions": 148,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44638/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44638",
+    "files_url": "https://github.com/huggingface/transformers/pull/44699/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44699",
     "labels": [],
     "merged": true,
-    "number": 44638,
-    "review_comments_count": 2,
+    "number": 44699,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix output capturing for Backbones",
-    "updated_at": "2026-03-12T17:11:32Z"
+    "title": "Fix several based models' pipeline parallel support",
+    "updated_at": "2026-03-20T13:53:27Z"
   },
   {
-    "additions": 571,
-    "author": "tarekziade",
+    "additions": 1,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Adds an `httpx` tracer to gather metrics about network calls - Collect and store metrics and generates an artifact in CI - Can be used locally with `DEBUG_NETWORK` - Activated in CircleCI example of local run: ```\u2026",
-    "changed_files": 4,
+    "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44636",
-    "created_at": "2026-03-12T15:25:10Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44698",
+    "created_at": "2026-03-14T11:18:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44636/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44636",
+    "files_url": "https://github.com/huggingface/transformers/pull/44698/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44698",
     "labels": [],
     "merged": true,
-    "number": 44636,
-    "review_comments_count": 5,
+    "number": 44698,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feat(ci): added a network debug report",
-    "updated_at": "2026-03-18T19:19:03Z"
+    "title": "Fix `set_encoder`",
+    "updated_at": "2026-03-14T13:42:00Z"
   },
   {
-    "additions": 111,
-    "author": "RyanMullins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Per a conversion with @Cyrilvallez on Slack on how to make Gemma models more compatible with modular inheritance, this PR: * Updates Gemma models to use `nn.parameter.Buffer` instead of a `self.register_buffer()` fo\u2026",
-    "changed_files": 32,
+    "additions": 75,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44635",
-    "created_at": "2026-03-12T14:47:46Z",
-    "deletions": 87,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44697",
+    "created_at": "2026-03-14T10:44:12Z",
+    "deletions": 25,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44635/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44635",
+    "files_url": "https://github.com/huggingface/transformers/pull/44697/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44697",
     "labels": [],
     "merged": false,
-    "number": 44635,
-    "review_comments_count": 0,
+    "number": 44697,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "[Gemma] Modular-friendly buffers",
-    "updated_at": "2026-03-18T10:44:25Z"
+    "title": "fix: torch_float should return float, not int",
+    "updated_at": "2026-03-17T19:29:02Z"
   },
   {
-    "additions": 30,
-    "author": "Cyrilvallez",
+    "additions": 19,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 3,
+    "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44634",
-    "created_at": "2026-03-12T14:04:36Z",
-    "deletions": 6,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44696",
+    "created_at": "2026-03-14T09:41:07Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44634/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44634",
+    "files_url": "https://github.com/huggingface/transformers/pull/44696/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44696",
     "labels": [],
     "merged": true,
-    "number": 44634,
-    "review_comments_count": 1,
+    "number": 44696,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Fix lfm2 kernel path",
-    "updated_at": "2026-03-12T15:00:59Z"
+    "title": "Fix `supports_{tp/pp}_plan`",
+    "updated_at": "2026-03-31T13:12:56Z"
   },
   {
-    "additions": 26,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title \ud83e\udd17",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44633",
-    "created_at": "2026-03-12T13:35:48Z",
-    "deletions": 11,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44695",
+    "created_at": "2026-03-14T09:05:35Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44633/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44633",
+    "files_url": "https://github.com/huggingface/transformers/pull/44695/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44695",
     "labels": [],
     "merged": true,
-    "number": 44633,
-    "review_comments_count": 2,
+    "number": 44695,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[medasr] doc update",
-    "updated_at": "2026-03-16T09:39:50Z"
+    "title": "fix(testing): Fix Kyutai Speech-To-Text and LongCatFlash test failures on main CI",
+    "updated_at": "2026-04-09T15:41:05Z"
   },
   {
-    "additions": 35,
-    "author": "Abdennacer-Badaoui",
-    "author_association": "MEMBER",
-    "body_excerpt": "Summary - Update test expectations for affected models - Add some needed dependencies - Fix TypeError: `GenerationMixin.prepare_inputs_for_generation()` got multiple values for argument 'next_sequence_length' in Qwen2.5-Omni talker by pass\u2026",
-    "changed_files": 6,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44632",
-    "created_at": "2026-03-12T13:32:33Z",
-    "deletions": 22,
+    "additions": 143,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026",
+    "changed_files": 7,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44693",
+    "created_at": "2026-03-14T05:43:00Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44632/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44632",
+    "files_url": "https://github.com/huggingface/transformers/pull/44693/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44693",
     "labels": [],
-    "merged": true,
-    "number": 44632,
-    "review_comments_count": 6,
+    "merged": false,
+    "number": 44693,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[AMD CI] Fix test failures across important models ",
-    "updated_at": "2026-03-17T14:58:10Z"
+    "title": "fix: Propagate num_labels to text_config in Qwen3.5",
+    "updated_at": "2026-03-18T12:56:25Z"
   },
   {
-    "additions": 33,
-    "author": "RyanMullins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Updates the weights conversion scripts for Gemma to: * Use the new `SentencePieceExtractor` class to get the vocab and merges from the SPM * Always initialize and save the unified `GemmaTokenizer` class ## Before su\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 18,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44692",
+    "created_at": "2026-03-14T04:14:38Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44692/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44692",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44692,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template",
+    "updated_at": "2026-03-18T12:44:18Z"
+  },
+  {
+    "additions": 23,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44631",
-    "created_at": "2026-03-12T13:32:25Z",
-    "deletions": 45,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44691",
+    "created_at": "2026-03-14T04:10:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44631/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44631",
-    "labels": [],
-    "merged": true,
-    "number": 44631,
+    "files_url": "https://github.com/huggingface/transformers/pull/44691/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44691",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44691,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
+    "updated_at": "2026-03-18T12:57:19Z"
+  },
+  {
+    "additions": 6,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44360-6",
+    "cluster_ids": [
+      "cluster-44360-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44690",
+    "created_at": "2026-03-14T03:44:37Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44690/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44690",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44690,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Gemma] Update conversion scripts for Transformers v5 Comaptibility",
-    "updated_at": "2026-03-18T10:39:53Z"
+    "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring",
+    "updated_at": "2026-03-18T12:40:23Z"
   },
   {
-    "additions": 42,
-    "author": "MaybeSam05",
+    "additions": 141,
+    "author": "LincolnBurrows2017",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a bug where `num_labels` passed to `AutoConfig.from_pretrained` for Qwen3.5 did not propagate from the top\u2011level `Qwen3_5Config` into the `text_config`, so `AutoModelForSequenceClassification` still saw the de\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026",
+    "changed_files": 6,
     "cluster_id": "cluster-44625-9",
     "cluster_ids": [
       "cluster-44625-9"
     ],
     "cluster_role": "member",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44630",
-    "created_at": "2026-03-12T13:25:54Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44688",
+    "created_at": "2026-03-14T00:40:50Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44630/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44630",
+    "files_url": "https://github.com/huggingface/transformers/pull/44688/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44688",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44630,
+    "number": 44688,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen3.5 num_labels propagation to text_config (fix #44625)",
-    "updated_at": "2026-03-12T13:46:07Z"
-  },
-  {
-    "additions": 15,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes \"auto\" dtype when the model is initialized `from_config` It was already fixed for `from_pretrained` in https://github.com/huggingface/transformers/pull/42990 but vLLM creates models with `AutoModel._from_confi\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44629",
-    "created_at": "2026-03-12T13:07:55Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44629/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44629",
-    "labels": [],
-    "merged": true,
-    "number": 44629,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Ensure same `dtype` for subconfig when `_from_config`",
-    "updated_at": "2026-03-13T11:35:10Z"
+    "title": "fix: Propagate num_labels to text_config in Qwen models",
+    "updated_at": "2026-03-18T12:56:41Z"
   },
   {
-    "additions": 37,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - `encoder_config` and `decoder_config` should return `None` for encoder / decoder config classes themselves. - The encoder / decoder model classes should have the correct config classes associated to them <!-- CURS\u2026",
-    "changed_files": 3,
+    "additions": 8,
+    "author": "vxa8502",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes partial #32937 Adds explicit `position_ids` threading through GPT-Neo's attention layers to enable flash attention's packed sequence optimization. ## Context GPT-Neo uses learned absolute position embeddings (`wpe`) applied at the mo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44628",
-    "created_at": "2026-03-12T12:24:44Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44628/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44628",
-    "labels": [],
-    "merged": true,
-    "number": 44628,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix for `VibeVoiceAcousticTokenizer`",
-    "updated_at": "2026-03-12T13:33:02Z"
-  },
-  {
-    "additions": 141,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The diff in revert mapping is needed, otherwise we get failures in a few models, see https://app.circleci.com/pipelines/github/huggingface/transformers/167425/workflows/fa96efe5-f810-408e-bafd-de03b7e881aa/jobs/2208\u2026",
-    "changed_files": 78,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 16,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44627",
-    "created_at": "2026-03-12T12:00:31Z",
-    "deletions": 367,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44687",
+    "created_at": "2026-03-13T23:28:55Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44627/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44627",
-    "labels": [],
-    "merged": true,
-    "number": 44627,
-    "review_comments_count": 8,
+    "files_url": "https://github.com/huggingface/transformers/pull/44687/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44687",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44687,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Move VLM conversions to the main mapping",
-    "updated_at": "2026-03-17T10:13:03Z"
+    "title": "Add explicit position_ids to GPT-Neo attention layers",
+    "updated_at": "2026-03-18T13:06:49Z"
   },
   {
-    "additions": 11,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds a missing branch. I don't really know if this is worth it, can't find a model online that enforces the flag to `True`",
-    "changed_files": 1,
+    "additions": 615,
+    "author": "tejasae-afk",
+    "author_association": "NONE",
+    "body_excerpt": "During an automated code review of src/transformers/models/marian/convert_marian_to_pytorch.py, the following issue was identified. Use safe_load in convert marian to pytorch. yaml.load on untrusted input can construct arbitrary Python obj\u2026",
+    "changed_files": 80,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44626",
-    "created_at": "2026-03-12T11:23:21Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44686",
+    "created_at": "2026-03-13T21:22:07Z",
+    "deletions": 259,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44626/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44626",
+    "files_url": "https://github.com/huggingface/transformers/pull/44686/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44686",
     "labels": [],
     "merged": false,
-    "number": 44626,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "don't break legacy behavior when enforced!",
-    "updated_at": "2026-03-12T11:32:46Z"
-  },
-  {
-    "additions": 34,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of #44549",
-    "changed_files": 16,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44624",
-    "created_at": "2026-03-12T09:26:17Z",
-    "deletions": 34,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44624/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44624",
-    "labels": [],
-    "merged": true,
-    "number": 44624,
+    "number": 44686,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix more wrong HF hub checkpoint names",
-    "updated_at": "2026-03-12T09:59:12Z"
+    "title": "Use safe_load in convert marian to pytorch",
+    "updated_at": "2026-03-14T03:54:31Z"
   },
   {
-    "additions": 17,
-    "author": "LysandreJik",
+    "additions": 10,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "CB temporarily disabled on non-cuda devices as it's currently using cuda streams by default.",
+    "body_excerpt": "# What does this PR do? For tiny model creation script - new added model test files still miss this argument ...",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44622",
-    "created_at": "2026-03-12T08:11:10Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44685",
+    "created_at": "2026-03-13T20:53:41Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44622/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44622",
+    "files_url": "https://github.com/huggingface/transformers/pull/44685/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44685",
     "labels": [],
-    "merged": false,
-    "number": 44622,
+    "merged": true,
+    "number": 44685,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "CB temporarily disabled on non-cuda devices",
-    "updated_at": "2026-03-16T00:00:20Z"
+    "title": "Fix more model tester missing `parent` issue",
+    "updated_at": "2026-03-13T21:03:46Z"
   },
   {
-    "additions": 0,
-    "author": "KoichiYasuoka",
+    "additions": 41,
+    "author": "ntenenz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43170 for `modelcard` removal Quick reproduce: ``` from transformers import pipeline fmp=pipeline(\"fill-mask\",\"google-bert/bert-base-cased\") fmp.save_pretrained(\"tmpdir\") ``` ## Before submitting - [ ] This P\u2026",
+    "body_excerpt": "\u2026 # What does this PR do? In torch versions >= 2.9.0, it requests the lse from flex_attenetion using `AuxRequest` instead of the deprecated `return_lse`, which triggers a warning and can break tracing. Fixes #44683 ## Before submitting - [\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44621",
-    "created_at": "2026-03-12T08:04:29Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44684",
+    "created_at": "2026-03-13T20:16:35Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44621/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44621",
+    "files_url": "https://github.com/huggingface/transformers/pull/44684/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44684",
     "labels": [],
     "merged": true,
-    "number": 44621,
-    "review_comments_count": 0,
+    "number": 44684,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "pipelines do not have modelcard",
-    "updated_at": "2026-03-13T14:28:48Z"
+    "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9",
+    "updated_at": "2026-03-18T11:44:18Z"
   },
   {
-    "additions": 15,
-    "author": "LysandreJik",
+    "additions": 301,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "FastAPI doesn't play well with `from __future__ import annotations`. This PR reverts this change and correctly guards against unprotected optional imports. Reverts https://github.com/huggingface/transformers/pull/44256",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44620",
-    "created_at": "2026-03-12T07:56:55Z",
-    "deletions": 16,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44682",
+    "created_at": "2026-03-13T18:52:41Z",
+    "deletions": 73,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44620/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44620",
+    "files_url": "https://github.com/huggingface/transformers/pull/44682/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44682",
     "labels": [],
-    "merged": true,
-    "number": 44620,
+    "merged": false,
+    "number": 44682,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix transformers serve's 422 unprocessable entity",
-    "updated_at": "2026-03-16T13:41:44Z"
+    "state": "open",
+    "title": "transformers serve + llamacpp",
+    "updated_at": "2026-03-14T07:05:29Z"
   },
   {
-    "additions": 43,
-    "author": "yunhaoli24",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Set `add_bos_token=True` and `add_eos_token=True` by default in `DebertaV2Tokenizer` to fix the regression where `add_special_tokens=True` doesn't add BOS/EOS tokens for `microsoft/mdeberta-v3-base` tokenizer in transformers >=5\u2026",
+    "additions": 47,
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44618",
-    "created_at": "2026-03-12T04:46:19Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44680",
+    "created_at": "2026-03-13T17:55:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44618/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44618",
+    "files_url": "https://github.com/huggingface/transformers/pull/44680/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44680",
     "labels": [],
     "merged": false,
-    "number": 44618,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Add BOS/EOS tokens by default for DeBERTa v2 tokenizer",
-    "updated_at": "2026-03-16T05:28:25Z"
+    "number": 44680,
+    "review_comments_count": 12,
+    "state": "open",
+    "title": "Allow kernel modules to declare their preferred mask function",
+    "updated_at": "2026-04-14T13:12:25Z"
   },
   {
-    "additions": 7,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Add fallback to bfloat16 when Float8 dtype fails to set, preventing TypeError when loading FP8 models on PyTorch builds without Float8_e4m3fnStorage support. ## Root Cause `torch.set_default_dtype(dtype)` raises `TypeError: coul\u2026",
+    "additions": 9,
+    "author": "JokeYoonic",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44616",
-    "created_at": "2026-03-11T23:00:15Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44676",
+    "created_at": "2026-03-13T16:28:01Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44616/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44616",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44676/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44676",
+    "labels": [],
     "merged": false,
-    "number": 44616,
+    "number": 44676,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: add Float8 dtype fallback in modeling_utils.py",
-    "updated_at": "2026-03-18T16:02:54Z"
+    "state": "open",
+    "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights",
+    "updated_at": "2026-03-18T17:16:49Z"
   },
   {
-    "additions": 35,
-    "author": "MaybeSam05",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Restores `is_torch_fx_available` in `transformers.utils.import_utils` as a backwards-compatibility shim so that Hub models loaded with `trust_remote_code=True` that still import this symbol no longer raise `ImportEr\u2026",
+    "additions": 32,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: <img width=\"976\" height=\"626\" alt=\"Screenshot 2026-03-13 at 9 09 39 AM\" src=\"https://github.com/user-attachments/assets/4390c6f7-bb63-4039-a46e-9f4ae23f5d98\" />",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44615",
-    "created_at": "2026-03-11T22:52:23Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44675",
+    "created_at": "2026-03-13T16:10:28Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44615/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44615",
+    "files_url": "https://github.com/huggingface/transformers/pull/44675/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44675",
     "labels": [],
-    "merged": false,
-    "number": 44615,
+    "merged": true,
+    "number": 44675,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Restore is_torch_fx_available for trust_remote_code backwards compatibility",
-    "updated_at": "2026-03-12T10:33:43Z"
+    "state": "closed",
+    "title": "[docs] cb config",
+    "updated_at": "2026-03-13T23:15:04Z"
   },
   {
-    "additions": 19,
-    "author": "vasqu",
+    "additions": 408,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "Only detected in #43590, so it can only be detected there for `test_sdpa_can_compile_dynamic` (`lw_detr`). Core issue: Dynamo can cache the attribute and ignore it across frames which means that updates/reads are not working as expected. T\u2026",
-    "changed_files": 2,
+    "body_excerpt": "We've had `parse_response()` in the library for a while, but it's been a soft launch / prototype feature. This PR cleans it up and documents it, making it an official feature! The API is largely unchanged from the prototype, but we drop `x\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44614",
-    "created_at": "2026-03-11T20:49:51Z",
-    "deletions": 22,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44674",
+    "created_at": "2026-03-13T15:41:42Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44614/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44614",
+    "files_url": "https://github.com/huggingface/transformers/pull/44674/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44674",
     "labels": [],
-    "merged": false,
-    "number": 44614,
-    "review_comments_count": 10,
+    "merged": true,
+    "number": 44674,
+    "review_comments_count": 11,
     "state": "closed",
-    "title": "[`Compile`] Fix capture outputs during compile",
-    "updated_at": "2026-03-13T02:15:21Z"
+    "title": "Officially launch parse_response",
+    "updated_at": "2026-03-24T15:55:05Z"
   },
   {
-    "additions": 105,
-    "author": "stevhliu",
+    "additions": 73,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "adds docs for tensor parallelism for training",
+    "body_excerpt": "This PR fixes a bug in continuous batching where non-CUDA devices cannot use the feature because some CUDA-exclusive objects are always instantiated. It also adds a test to make sure this will not break again in the future.",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44613",
-    "created_at": "2026-03-11T20:43:53Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44673",
+    "created_at": "2026-03-13T15:37:01Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44613/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44613",
+    "files_url": "https://github.com/huggingface/transformers/pull/44673/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44673",
     "labels": [],
     "merged": true,
-    "number": 44613,
+    "number": 44673,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] tp training",
-    "updated_at": "2026-04-09T22:20:47Z"
+    "title": "[CB] [Bug] Fix crashes when running without cuda",
+    "updated_at": "2026-03-15T23:59:55Z"
   },
   {
     "additions": 1,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The output function hook in `EmbeddingParallel` casts the mask to fp32. It breaks things for neuron devices. Suggested fix: cast to the outputs' dtype.",
+    "author": "neo",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44612",
-    "created_at": "2026-03-11T20:09:41Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44672",
+    "created_at": "2026-03-13T15:33:15Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44612/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44612",
+    "files_url": "https://github.com/huggingface/transformers/pull/44672/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44672",
     "labels": [],
     "merged": true,
-    "number": 44612,
-    "review_comments_count": 3,
+    "number": 44672,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: cast to proper dtype in EmbeddingParallel",
-    "updated_at": "2026-03-12T21:08:04Z"
+    "title": "Fix annotations reader for python 3.14 in `PreTrainedModel`",
+    "updated_at": "2026-03-19T13:30:48Z"
   },
   {
-    "additions": 15,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 [This PR (\ud83d\udea8 Delete duplicate code in backbone utils)](https://github.com/huggingface/transformers/pull/43323) structured config loading to use [BackboneMi\u2026",
-    "changed_files": 2,
+    "additions": 6,
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #44678 ## Summary - Replace advanced indexing (`self.gate_up_proj[expert_ids]`) with explicit `torch.index_select(self.gate_up_proj, 0, expert_ids)` in `batched_mm_experts_forward` - 6 replacements total (3 weight tensors + 3 bias te\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44611",
-    "created_at": "2026-03-11T20:02:14Z",
-    "deletions": 3,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44669",
+    "created_at": "2026-03-13T14:52:22Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44611/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44611",
+    "files_url": "https://github.com/huggingface/transformers/pull/44669/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44669",
     "labels": [],
-    "merged": true,
-    "number": 44611,
-    "review_comments_count": 12,
+    "merged": false,
+    "number": 44669,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Forward timm model kwargs to timm.create_model for OmDet-Turbo",
-    "updated_at": "2026-03-13T11:57:20Z"
+    "title": "Use `index_select` instead of advanced indexing in `batched_mm_experts_forward`",
+    "updated_at": "2026-03-19T13:39:23Z"
   },
   {
-    "additions": 0,
-    "author": "michaelbenayoun",
+    "additions": 18,
+    "author": "dacorvo",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? By default, `initialize_tensor_parallelism` hides stdout and stderr for ranks > 0. While convenient, this is not perfect for dev and debugging. I suggest we simply add a flag to be able to disable this feature if wa\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Fixes #44677 ## Summary - Add `base_model_tp_plan` to `OlmoeConfig`, enabling `from_pretrained(tp_plan=\"auto\")` for OLMoE models - Add `TensorParallelTesterMixin` to OLMoE tests for TP validation coverage - Uses `\"colwise\"` for `q_norm` an\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44608",
-    "created_at": "2026-03-11T18:57:01Z",
-    "deletions": 7,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44668",
+    "created_at": "2026-03-13T14:45:22Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44608/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44608",
+    "files_url": "https://github.com/huggingface/transformers/pull/44668/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44668",
     "labels": [],
     "merged": true,
-    "number": 44608,
-    "review_comments_count": 2,
+    "number": 44668,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Allow to disable stdout hiding for TP",
-    "updated_at": "2026-03-12T19:36:06Z"
+    "title": "Add `base_model_tp_plan` to `OlmoeConfig`",
+    "updated_at": "2026-03-26T13:58:58Z"
   },
   {
-    "additions": 9,
-    "author": "gabe-l-hart",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes generation for models using the `Idefics3ForConditionalGeneration` architecture with `use_cache=False`. ## Testing <details> <summary>docling_repro.py</summary> ```py import os import torch import time\u2026",
-    "changed_files": 3,
+    "additions": 412,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? A few find-and-replaces and cache position is deleted from the rest of models. Still have to check docs and test files, so WIP",
+    "changed_files": 120,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44607",
-    "created_at": "2026-03-11T18:41:58Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44667",
+    "created_at": "2026-03-13T14:37:26Z",
+    "deletions": 1519,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44667/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44667",
+    "labels": [],
+    "merged": false,
+    "number": 44667,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Goodbye cache position",
+    "updated_at": "2026-03-19T11:55:04Z"
+  },
+  {
+    "additions": 17,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - 2 model tester classes didn't follow the usual way we do things, which cause the tiny model creation script to fail with those model classes. - (the script initializes instances of model testers, in order to call\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44666",
+    "created_at": "2026-03-13T14:24:50Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44607/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44607",
+    "files_url": "https://github.com/huggingface/transformers/pull/44666/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44666",
     "labels": [],
     "merged": true,
-    "number": 44607,
+    "number": 44666,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Idefics3 without cache fix",
-    "updated_at": "2026-03-16T15:23:34Z"
+    "title": "Another (small) set of fixes required for tiny model creation",
+    "updated_at": "2026-03-13T17:20:52Z"
   },
   {
-    "additions": 26,
-    "author": "itazap",
+    "additions": 1,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "In v5, we enforce creating a model-specific tokenizer (ex. LlamaTokenizer, Qwen2Tokenizer, et .) object when specified. 1. For instance, when `tokenizer_class` is set in `tokenization_config.json` 2. Or when using the auto_mapped `tokenize\u2026",
+    "body_excerpt": "Dinov3 vit was refactored to introduce a module between top level and layers to have the capture decorators work as intended. Otherwise, it would force the backbone to do manual collection. This introduced a small conversion which is now a\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44606",
-    "created_at": "2026-03-11T17:29:12Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44665",
+    "created_at": "2026-03-13T13:50:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44606/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44606",
+    "files_url": "https://github.com/huggingface/transformers/pull/44665/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44665",
     "labels": [],
-    "merged": false,
-    "number": 44606,
+    "merged": true,
+    "number": 44665,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "optionally override tokenizer class with serialized tokenizer ",
-    "updated_at": "2026-03-17T16:03:19Z"
+    "state": "closed",
+    "title": "[`Chmv2`] Fix conversion after capture refactor",
+    "updated_at": "2026-03-13T14:28:33Z"
   },
   {
-    "additions": 564,
-    "author": "stevhliu",
+    "additions": 18,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "* refactors DeepSpeed ZeRO doc: - moves the troubleshooting section to the more general Debugging guide to keep everything in one place - moves the sequence parallelism section into a new doc to give it more visibility - update to be more\u2026",
-    "changed_files": 4,
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/44625 and https://github.com/huggingface/transformers/pull/44406#issuecomment-4203794413 We can create a clf model with LLM or VLM backbone like: ```python from transformers import A\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44605",
-    "created_at": "2026-03-11T17:26:12Z",
-    "deletions": 1163,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44664",
+    "created_at": "2026-03-13T13:39:52Z",
+    "deletions": 193,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44605/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44605",
+    "files_url": "https://github.com/huggingface/transformers/pull/44664/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44664",
     "labels": [],
-    "merged": true,
-    "number": 44605,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "[docs] zero + sequence parallelism",
-    "updated_at": "2026-04-10T16:46:49Z"
+    "merged": false,
+    "number": 44664,
+    "review_comments_count": 8,
+    "state": "open",
+    "title": "Generic Sequence Classifier works for multimodal models",
+    "updated_at": "2026-04-14T09:50:30Z"
   },
   {
-    "additions": 415,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fix the quantization CI : - [x] autoround - [x] bnb - [x] fp_quant_integration - [x] metal - [x] mxfp4 - [x] quark_integration - [x] torchao_integration One major point in this PR is that I bump the min vers\u2026",
-    "changed_files": 19,
+    "additions": 1,
+    "author": "Ker102",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44661 \u2014 `transformers add-new-model-like` crashes with `AttributeError: 'ModelInfos' object has no attribute 'tokenizer_class'` when selecting a model that is in `TOKENIZER_MAPPING_NAMES`. ## Root Cause PR #40936 refactor\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 33,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44604",
-    "created_at": "2026-03-11T16:44:38Z",
-    "deletions": 912,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44663",
+    "created_at": "2026-03-13T13:25:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44604/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44604",
-    "labels": [],
-    "merged": true,
-    "number": 44604,
+    "files_url": "https://github.com/huggingface/transformers/pull/44663/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44663",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44663,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Bump torchao >=0.15 and fix quantization CI",
-    "updated_at": "2026-03-16T16:07:12Z"
+    "title": "fix: restore missing `tokenizer_class` attribute in `ModelInfos.__init__`",
+    "updated_at": "2026-03-13T14:02:00Z"
   },
   {
-    "additions": 17,
-    "author": "michalrzak",
+    "additions": 7084,
+    "author": "CyrilSterling",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? A quick fix that updates the Dockerfile to run on `arm64` systems (such as the NVIDIA Spark). The previous version of the Dockerfile fails on `arm64` systems due to `SudachiPy`, which only provides wheels for `x86_6\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR supports PenguinVL model. Paper: https://arxiv.org/abs/2603.06569 Github repo: https://github.com/tencent-ailab/Penguin-VL HuggingFace Model: https://huggingface.co/collections/tencent/ai-lab ## Before submi\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44603",
-    "created_at": "2026-03-11T16:42:30Z",
-    "deletions": 2,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44662",
+    "created_at": "2026-03-13T13:02:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44603/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44603",
+    "files_url": "https://github.com/huggingface/transformers/pull/44662/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44662",
     "labels": [],
     "merged": false,
-    "number": 44603,
-    "review_comments_count": 0,
+    "number": 44662,
+    "review_comments_count": 95,
     "state": "open",
-    "title": "fixed dockerfile for arm64 systems",
-    "updated_at": "2026-03-11T16:42:30Z"
+    "title": "[model] Add PenguinVL implementation",
+    "updated_at": "2026-04-01T10:53:25Z"
   },
   {
-    "additions": 218,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Follow up of https://github.com/huggingface/transformers/pull/44330 Also take the opportunity to simplify t5 and its children, because the way they compute`position_bias` was super convoluted/overc\u2026",
-    "changed_files": 61,
+    "additions": 5,
+    "author": "DogWala",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44637 This PR makes the PEFT `load_best_model_at_end` path in `Trainer` use a CPU-first adapter reload path during best-model loading. Previously, when training a PEFT model, `Trainer` could reload the best a\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44602",
-    "created_at": "2026-03-11T16:19:43Z",
-    "deletions": 1083,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44660",
+    "created_at": "2026-03-13T12:59:25Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44602/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44602",
+    "files_url": "https://github.com/huggingface/transformers/pull/44660/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44660",
     "labels": [],
-    "merged": true,
-    "number": 44602,
-    "review_comments_count": 14,
-    "state": "closed",
-    "title": "Remove `cache_position` in more models (2)",
-    "updated_at": "2026-03-12T22:38:15Z"
+    "merged": false,
+    "number": 44660,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix: avoid late CUDA OOM in load_best_model_at_end with PEFT models",
+    "updated_at": "2026-03-13T13:18:52Z"
   },
   {
-    "additions": 510,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Goal is to be able to run a model with both PP and TP.",
-    "changed_files": 4,
+    "additions": 0,
+    "author": "Olexandr88",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Removes outdated use_diff entry from the docstring. The parameter is not present in the method signature or implementation.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44601",
-    "created_at": "2026-03-11T15:56:51Z",
-    "deletions": 2,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44601/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44601",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44659",
+    "created_at": "2026-03-13T11:08:13Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44659/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44659",
     "labels": [],
     "merged": false,
-    "number": 44601,
+    "number": 44659,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[Distributed] Add PP support natively",
-    "updated_at": "2026-03-12T11:53:24Z"
+    "title": "docs: remove outdated use_diff docstring from DistributedConfig.to_js\u2026",
+    "updated_at": "2026-03-13T13:42:29Z"
   },
   {
-    "additions": 0,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Small mistake in https://github.com/huggingface/transformers/pull/44432. cc @zucchini-nlp, was it intended to remove the scaling? (I assume so since the embedding now has the saling baked in, and I guess paligemma a\u2026",
-    "changed_files": 1,
+    "additions": 18,
+    "author": "Charly21r",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an issue where the `.modelcard` attribute of a pipeline is not initialized. Without this fix, calling `save_pretrained` on a pipeline (e.g., ASR pipeline) raises an `AttributeError` because `.modelcard` does n\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44600",
-    "created_at": "2026-03-11T15:40:16Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44658",
+    "created_at": "2026-03-13T10:36:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44600/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44600",
+    "files_url": "https://github.com/huggingface/transformers/pull/44658/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44658",
     "labels": [],
-    "merged": true,
-    "number": 44600,
+    "merged": false,
+    "number": 44658,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove useless identity assignment",
-    "updated_at": "2026-03-12T10:21:23Z"
+    "title": "Fix: fix Pipeline's save_pretrained method (issue #44655)",
+    "updated_at": "2026-03-13T14:08:27Z"
   },
   {
-    "additions": 3,
-    "author": "hf-security-analysis[bot]",
+    "additions": 1,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Update `.github/workflows/self-comment-ci.yml` workflow configuration. cc @vasqu Closes huggingface/tracking-issues#17",
+    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review. Thx!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44599",
-    "created_at": "2026-03-11T15:39:30Z",
-    "deletions": 161,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44657",
+    "created_at": "2026-03-13T10:25:07Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44599/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44599",
+    "files_url": "https://github.com/huggingface/transformers/pull/44657/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44657",
     "labels": [],
-    "merged": false,
-    "number": 44599,
+    "merged": true,
+    "number": 44657,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: update self-comment-ci.yml",
-    "updated_at": "2026-04-02T09:05:25Z"
+    "title": "fix bug embedding_size mismatch with hidden_size in electra model test",
+    "updated_at": "2026-04-01T08:24:54Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, updated to remove former members as well cc @yonigozlan @ydshieh",
-    "changed_files": 1,
+    "additions": 80,
+    "author": "juliendenize",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR add support to mistral-common v1.10.0. This involves: - reasoning effort feature - Python 3.14 Also add `image_sizes` in `apply_chat_template` output to match what is expected by standard processor.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44598",
-    "created_at": "2026-03-11T15:13:25Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44656",
+    "created_at": "2026-03-13T10:24:11Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44598/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44598",
+    "files_url": "https://github.com/huggingface/transformers/pull/44656/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44656",
     "labels": [],
     "merged": true,
-    "number": 44598,
-    "review_comments_count": 0,
+    "number": 44656,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add Yoni to run-slow workflow",
-    "updated_at": "2026-03-11T15:38:10Z"
+    "title": "[MistralCommonBackend] Upgrade mistral-common to v1.10.0",
+    "updated_at": "2026-03-16T12:46:29Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Our beautiful Dashboard is missing ..... damm",
+    "additions": 13,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes the AWQ test suite to align with the GPTQModel migration (following #41567 and #42776). ### Changes - **Fix `replace_with_awq_linear` return value**: The function now returns the model directly instead of a tuple `(model, _)`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44597",
-    "created_at": "2026-03-11T13:53:02Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44654",
+    "created_at": "2026-03-13T07:31:19Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44597/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44597",
+    "files_url": "https://github.com/huggingface/transformers/pull/44654/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44654",
     "labels": [],
     "merged": true,
-    "number": 44597,
+    "number": 44654,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CircleCI summary report not showing due to missing dependency",
-    "updated_at": "2026-03-20T07:33:38Z"
+    "title": "Fix AWQ tests for GPTQModel migration",
+    "updated_at": "2026-03-13T16:28:14Z"
   },
   {
-    "additions": 26,
-    "author": "Desel72",
-    "author_association": "NONE",
-    "body_excerpt": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype # What does this PR do? When loading FP8 models (e.g. `Qwen/Qwen3.5-35B-A3B-FP8`) with `dtype=\"auto\"`, the auto-detected dtype from checkpoint weight\u2026",
-    "changed_files": 2,
+    "additions": 18,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@zucchini-nlp, can you help review? Thx! unit tests to reproduce this bug: `tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py::Phi4MultimodalIntegrationTest::test_audio_text_generation`",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44596",
-    "created_at": "2026-03-11T13:03:19Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44653",
+    "created_at": "2026-03-13T07:14:25Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44596/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44596",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44653/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44653",
+    "labels": [],
     "merged": false,
-    "number": 44596,
-    "review_comments_count": 0,
+    "number": 44653,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype",
-    "updated_at": "2026-03-11T14:00:39Z"
+    "title": "Fix `AutoImageProcessor` to correctly detect local implementation whe\u2026",
+    "updated_at": "2026-04-09T02:32:53Z"
   },
   {
-    "additions": 2324,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add [CHMv2 ](https://arxiv.org/abs/2603.06382) to Transformers",
-    "changed_files": 23,
+    "additions": 8,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44623 `processor.save_pretrained()` in v5 only saves the unified `processor_config.json` with nested sub-processor configs, but does not save standalone config files like `preprocessor_config.json` for the image processor\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44595",
-    "created_at": "2026-03-11T12:38:44Z",
-    "deletions": 23,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44652",
+    "created_at": "2026-03-13T05:38:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44595/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44595",
+    "files_url": "https://github.com/huggingface/transformers/pull/44652/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44652",
     "labels": [
-      "New model",
-      "run-slow"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 44595,
-    "review_comments_count": 30,
+    "merged": false,
+    "number": 44652,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add CHMv2",
-    "updated_at": "2026-03-11T16:00:03Z"
+    "title": "Fix processor.save_pretrained missing sub-processor config files",
+    "updated_at": "2026-03-13T12:03:37Z"
   },
   {
-    "additions": 271,
-    "author": "vimal-crypto",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What this PR does This PR brings `ObjectDetectionPipeline` in line with its sister pipelines (`ZeroShotObjectDetectionPipeline`, `ImageClassificationPipeline`) by adding four enhancements to the postprocessing stage. ### Changes **1. Sc\u2026",
+    "additions": 10,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44625 When passing `num_labels` to `AutoConfig.from_pretrained` for Qwen3.5, the value is set on the outer `Qwen3_5Config` but not propagated to `text_config`. This causes `AutoModelForSequenceClassification` to use the d\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44594",
-    "created_at": "2026-03-11T12:37:46Z",
-    "deletions": 40,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44651",
+    "created_at": "2026-03-13T05:35:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44594/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44594",
+    "files_url": "https://github.com/huggingface/transformers/pull/44651/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44651",
     "labels": [],
     "merged": false,
-    "number": 44594,
+    "number": 44651,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Pipeline] Add top_k, label filtering, box_format and score sorting to ObjectDetectionPipeline",
-    "updated_at": "2026-03-11T12:37:46Z"
+    "state": "closed",
+    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
+    "updated_at": "2026-03-13T13:39:43Z"
   },
   {
-    "additions": 15,
-    "author": "BenjaminBossan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Multiple PEFT tests are failing due to recent changes in transformers. - hf_device_map attribute may not exist in some cases - respect inference_mode in load_adapter - new model loading requires changes for bnb (SCB\u2026",
-    "changed_files": 3,
+    "additions": 188,
+    "author": "shaealh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Closes #44593 ## Summary - use generation_input_ids/generation_attention_mask when provided for decoder-only models - otherwise infer prompt from leading -100 labels and build left-padded prompt batch - return completion tokens for decoder\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44592",
-    "created_at": "2026-03-11T10:41:51Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44650",
+    "created_at": "2026-03-13T05:33:59Z",
     "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44592/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44592",
+    "files_url": "https://github.com/huggingface/transformers/pull/44650/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44650",
     "labels": [],
-    "merged": true,
-    "number": 44592,
+    "merged": false,
+    "number": 44650,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "FIX Multiple PEFT errors after v5 transition",
-    "updated_at": "2026-03-11T12:24:05Z"
+    "state": "open",
+    "title": "Fix Seq2SeqTrainer generation path for decoder-only models",
+    "updated_at": "2026-04-02T21:23:53Z"
   },
   {
-    "additions": 60,
-    "author": "moktamd",
+    "additions": 12,
+    "author": "gambletan",
     "author_association": "NONE",
-    "body_excerpt": "Adds `_apply_mps_fixes` in `sdpa_attention.py` to handle two upstream PyTorch MPS bugs: 1. **pytorch/pytorch#176767** (fixed in PyTorch 2.12): pads value tensor when `v_head_dim != q_head_dim` to avoid corrupted output. Affects DeepSeek mo\u2026",
+    "body_excerpt": "## Summary Fixes #44623 In v5.3.0, `ProcessorMixin.save_pretrained()` no longer calls `save_pretrained()` on non-tokenizer components (e.g. `image_processor`, `feature_extractor`). This means their individual config files (e.g. `preprocess\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44591",
-    "created_at": "2026-03-11T10:32:26Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44649",
+    "created_at": "2026-03-13T05:22:42Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44591/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44591",
+    "files_url": "https://github.com/huggingface/transformers/pull/44649/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44649",
     "labels": [],
     "merged": false,
-    "number": 44591,
+    "number": 44649,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add MPS SDPA workarounds for value head dim and bidirectional attention",
-    "updated_at": "2026-03-11T13:37:15Z"
+    "title": "fix: ensure processor.save_pretrained saves all component files",
+    "updated_at": "2026-03-13T05:36:54Z"
   },
   {
-    "additions": 2,
-    "author": "pranay-3108",
+    "additions": 0,
+    "author": "gambletan",
     "author_association": "NONE",
-    "body_excerpt": "Fixes incorrect documentation for `position_ids` in `masking_utils.py`. The docstring previously described `position_ids` as `torch.Tensor`. This PR updates it to `torch.LongTensor` and aligns the description with the standard wording used\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44590",
-    "created_at": "2026-03-11T05:13:57Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44590/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44590",
-    "labels": [
-      "Code agent slop"
+    "body_excerpt": "## Summary Fixes #44625 When `num_labels` or `id2label` are passed to `Qwen3_5Config` (e.g. via `AutoConfig.from_pretrained(model_name, num_labels=1)`), they are only set on the outer composite config but **not forwarded to `text_config`**\u2026",
+    "changed_files": 0,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
     ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44648",
+    "created_at": "2026-03-13T05:22:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44648/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44648",
+    "labels": [],
     "merged": false,
-    "number": 44590,
+    "number": 44648,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix incorrect docstring for position_ids",
-    "updated_at": "2026-03-11T21:08:42Z"
+    "title": "fix: propagate num_labels to text_config in Qwen3.5",
+    "updated_at": "2026-03-13T05:33:26Z"
   },
   {
-    "additions": 1,
-    "author": "jiqing-feng",
+    "additions": 9,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes Llama4 model loading under BitsAndBytes (BNB) quantization mode. Router quantized incorrectly causes shape mismatch: Llama4Router inherits from nn.Linear, so BNB quantizes its weight into a packed format. However, super().forward() c\u2026",
+    "body_excerpt": "@remi-or @ArthurZucker @McPatate pls help review, thx!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44588",
-    "created_at": "2026-03-11T01:42:33Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44647",
+    "created_at": "2026-03-13T04:55:26Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44588/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44588",
+    "files_url": "https://github.com/huggingface/transformers/pull/44647/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44647",
     "labels": [],
-    "merged": true,
-    "number": 44588,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44647,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix llama4 bnb mode",
-    "updated_at": "2026-03-27T14:19:14Z"
+    "title": "Add more generic device support for continuous batching",
+    "updated_at": "2026-04-09T02:32:51Z"
   },
   {
-    "additions": 32,
-    "author": "kmbhattt-aws",
+    "additions": 4,
+    "author": "LincolnBurrows2017",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "body_excerpt": "Fixed typo in Qwen3.5 and Qwen3-VL-MoE model files ```diff - Since Qwen3.5 use timestamps to seperate videos + Since Qwen3.5 use timestamps to separate videos ```",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44587",
-    "created_at": "2026-03-11T01:01:18Z",
-    "deletions": 20,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44587/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44587",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44646",
+    "created_at": "2026-03-13T04:48:06Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44646/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44646",
     "labels": [],
     "merged": false,
-    "number": 44587,
+    "number": 44646,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: Handling fused qkv result tensor slicing for tp sharded qkv weights",
-    "updated_at": "2026-03-12T21:31:29Z"
+    "title": "Fix typo: seperate -> separate",
+    "updated_at": "2026-03-13T15:27:08Z"
   },
   {
-    "additions": 91,
-    "author": "mvanhorn",
+    "additions": 4,
+    "author": "sywangyi",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Decouples router logits collection from output visibility in Mixtral's `ForCausalLM`. Previously, `output_router_logits=False` (the default) prevented `aux_loss` from being computed, meaning load balancing was silen\u2026",
-    "changed_files": 13,
+    "body_excerpt": "- Intel XPU: @IlyasMoutawwakil ``` ======================================================================== FAILURES ======================================================================== _________________________________________________\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44586",
-    "created_at": "2026-03-11T00:24:07Z",
-    "deletions": 39,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44645",
+    "created_at": "2026-03-13T02:53:31Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44586/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44586",
+    "files_url": "https://github.com/huggingface/transformers/pull/44645/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44645",
     "labels": [],
-    "merged": false,
-    "number": 44586,
+    "merged": true,
+    "number": 44645,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Mixtral aux_loss not computed when output_router_logits=False",
-    "updated_at": "2026-03-11T14:31:21Z"
+    "title": "fix Image.open failure in case \"tests/models/prompt_depth_anything/te\u2026",
+    "updated_at": "2026-03-27T11:11:05Z"
   },
   {
-    "additions": 10,
-    "author": "mvanhorn",
+    "additions": 2,
+    "author": "sywangyi",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Passes `eps=config.rms_norm_eps` to both `q_a_layernorm` and `kv_a_layernorm` in the DeepseekV3 MLA attention module. Without this, these layernorms default to `eps=1e-5` instead of the config value (`1e-6`), causin\u2026",
-    "changed_files": 5,
+    "body_excerpt": "\u2026MXFP4PseudoquantTest::test_quantized_model fail in xpu ## Who can review? - quantization: @SunMarc - Intel XPU: @IlyasMoutawwakil",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44585",
-    "created_at": "2026-03-11T00:20:54Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44644",
+    "created_at": "2026-03-13T02:02:45Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44585/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44585",
+    "files_url": "https://github.com/huggingface/transformers/pull/44644/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44644",
+    "labels": [],
+    "merged": true,
+    "number": 44644,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "fix tests/quantization/fp_quant_integration/test_fp_quant.py::FPQuant\u2026",
+    "updated_at": "2026-03-27T14:14:13Z"
+  },
+  {
+    "additions": 5,
+    "author": "joshuaswanson",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The generic `labels` docstring in `ModelArgs` says \"masked language modeling loss\" and doesn't mention that causal LM models shift labels internally. This has tripped up a lot of users who pre-shift their labels and end up training next-ne\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44642",
+    "created_at": "2026-03-12T23:47:11Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44642/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44642",
     "labels": [],
     "merged": false,
-    "number": 44585,
+    "number": 44642,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix missing rms_norm_eps in DeepseekV3 MLA layernorms",
-    "updated_at": "2026-03-12T14:39:12Z"
+    "title": "Clarify that causal LM labels are shifted internally",
+    "updated_at": "2026-03-13T00:02:30Z"
   },
   {
     "additions": 1,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an off-by-one error in `decode_spans()` in the document question answering pipeline that causes a `ValueError: kth(=N) out of bounds` crash when `len(scores_flat) == topk`. The boundary check on line 97 uses `\u2026",
+    "author": "kmbhattt-aws",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Issue: A full 4D attention mask of shape `[1, 1, seq_len, seq_len]` is being created during attention, even when not using alibi for positional embeddings. - This occupied extra memory during training. Root Cause: T\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44584",
-    "created_at": "2026-03-10T23:52:51Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44641",
+    "created_at": "2026-03-12T23:01:11Z",
     "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44641/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44641",
+    "labels": [],
+    "merged": false,
+    "number": 44641,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Conditinally passing and_mask_function arg to create_causal_mask ",
+    "updated_at": "2026-03-13T02:09:22Z"
+  },
+  {
+    "additions": 11,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - Add `\"embedding_colwise\"` tp plan - Add register methods for `ParallelInterface`. Without it, we can register plans with the register method, but not the corresponding `plan_to_weight_dim` and `plan_to_bias_dim`.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44640",
+    "created_at": "2026-03-12T20:14:06Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44584/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44584",
+    "files_url": "https://github.com/huggingface/transformers/pull/44640/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44640",
     "labels": [],
     "merged": true,
-    "number": 44584,
+    "number": 44640,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix off-by-one in decode_spans boundary check",
-    "updated_at": "2026-03-12T13:22:10Z"
+    "title": "Add register method for `ParallelInterface`",
+    "updated_at": "2026-03-13T18:57:48Z"
   },
   {
-    "additions": 45,
-    "author": "wilnn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026kpoint when `save_strategy` is `best` # What does this PR do? fix load_best_model_checkpoint_at_end do not load the best model checkpoint at the end when `save_strategy` is `\"best\"` Fixes # (issue) fix load_best_model_checkpoint_at_end do\u2026",
-    "changed_files": 5,
+    "additions": 24099,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary Fixes bugs introduced during the `__init__` \u2192 `@dataclass` conversion in #41250. All are incorrect default values caught by automated comparison of `__init__` signatures (main) vs dataclass fields (PR). | Model | Param | Was | S\u2026",
+    "changed_files": 931,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44583",
-    "created_at": "2026-03-10T22:37:36Z",
-    "deletions": 16,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44639",
+    "created_at": "2026-03-12T16:49:54Z",
+    "deletions": 38773,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44583/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44583",
+    "files_url": "https://github.com/huggingface/transformers/pull/44639/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44639",
     "labels": [],
-    "merged": true,
-    "number": 44583,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44639,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix load_best_model_checkpoint_at_end do not load the best model chec\u2026",
-    "updated_at": "2026-03-24T15:42:26Z"
+    "title": "Fix incorrect default values in config dataclass migration",
+    "updated_at": "2026-03-12T16:50:10Z"
   },
   {
-    "additions": 3,
-    "author": "yonigozlan",
+    "additions": 19,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix an issue introduced in #42564 . The refactor embedded raw image tokens instead of BPE tokens, causing the model to output gibberish. This fix adds back the image tokens to BPE tokens conversion before embedding.\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/44614. This one is the result of a long debugging session and discussion with @vasqu. The issue is as follow: - Backbone ALWAYS need to c\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44582",
-    "created_at": "2026-03-10T21:00:30Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44638",
+    "created_at": "2026-03-12T16:19:49Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44582/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44582",
+    "files_url": "https://github.com/huggingface/transformers/pull/44638/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44638",
     "labels": [],
     "merged": true,
-    "number": 44582,
-    "review_comments_count": 0,
+    "number": 44638,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix missing BPE token conversion step in Chameleon",
-    "updated_at": "2026-03-11T11:26:49Z"
+    "title": "Fix output capturing for Backbones",
+    "updated_at": "2026-03-12T17:11:32Z"
   },
   {
-    "additions": 9,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 **Llama-4 Vision:** [freqs_ci is stored as a plain attr](https://github.com/huggingface/transformers/blob/153894c013/src/transformers/models/llama4/mode\u2026",
-    "changed_files": 2,
+    "additions": 571,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - Adds an `httpx` tracer to gather metrics about network calls - Collect and store metrics and generates an artifact in CI - Can be used locally with `DEBUG_NETWORK` - Activated in CircleCI example of local run: ```\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44581",
-    "created_at": "2026-03-10T19:33:51Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44636",
+    "created_at": "2026-03-12T15:25:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44581/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44581",
+    "files_url": "https://github.com/huggingface/transformers/pull/44636/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44636",
     "labels": [],
     "merged": true,
-    "number": 44581,
-    "review_comments_count": 2,
+    "number": 44636,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "fix(models, testing): Fix Llama4 vision rotary meta tensor initialization and MyT5 get_tokenizer signature",
-    "updated_at": "2026-03-13T16:13:09Z"
+    "title": "feat(ci): added a network debug report",
+    "updated_at": "2026-03-18T19:19:03Z"
   },
   {
-    "additions": 16,
-    "author": "rabbierabbie",
-    "author_association": "NONE",
-    "body_excerpt": "This PR clarifies references to the **Transformers library** in the README. While reading the documentation as a new user, I initially found the term **\"Transformers\"** ambiguous because it could refer either to the **Transformer architect\u2026",
-    "changed_files": 1,
+    "additions": 111,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Per a conversion with @Cyrilvallez on Slack on how to make Gemma models more compatible with modular inheritance, this PR: * Updates Gemma models to use `nn.parameter.Buffer` instead of a `self.register_buffer()` fo\u2026",
+    "changed_files": 32,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44580",
-    "created_at": "2026-03-10T18:21:55Z",
-    "deletions": 16,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44635",
+    "created_at": "2026-03-12T14:47:46Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44580/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44580",
+    "files_url": "https://github.com/huggingface/transformers/pull/44635/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44635",
     "labels": [],
     "merged": false,
-    "number": 44580,
+    "number": 44635,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Clarify references to the Transformers library in README",
-    "updated_at": "2026-03-11T13:24:58Z"
+    "state": "open",
+    "title": "[Gemma] Modular-friendly buffers",
+    "updated_at": "2026-03-18T10:44:25Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
+    "additions": 30,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We need to fetch the specific commit (the so called merge commit created by Github itself)",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44579",
-    "created_at": "2026-03-10T16:58:09Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44634",
+    "created_at": "2026-03-12T14:04:36Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44579/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44579",
+    "files_url": "https://github.com/huggingface/transformers/pull/44634/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44634",
     "labels": [],
     "merged": true,
-    "number": 44579,
-    "review_comments_count": 0,
+    "number": 44634,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix PR comment CI for quantization job",
-    "updated_at": "2026-03-10T17:07:11Z"
+    "title": "Fix lfm2 kernel path",
+    "updated_at": "2026-03-12T15:00:59Z"
   },
   {
-    "additions": 449,
-    "author": "remi-or",
+    "additions": 26,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds three attributes to the compile config, to have granularity over how varlen (handles mixed prefil and decode batches) and decode (only decode batches) are compiled. We want to have this kind of granularity because va\u2026",
-    "changed_files": 11,
+    "body_excerpt": "# What does this PR do? As per title \ud83e\udd17",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44578",
-    "created_at": "2026-03-10T16:31:20Z",
-    "deletions": 121,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44633",
+    "created_at": "2026-03-12T13:35:48Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44578/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44578",
+    "files_url": "https://github.com/huggingface/transformers/pull/44633/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44633",
     "labels": [],
     "merged": true,
-    "number": 44578,
-    "review_comments_count": 4,
+    "number": 44633,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[CB] Better parametrization for compile",
-    "updated_at": "2026-03-19T11:50:08Z"
+    "title": "[medasr] doc update",
+    "updated_at": "2026-03-16T09:39:50Z"
   },
   {
-    "additions": 5,
-    "author": "tarekziade",
+    "additions": 35,
+    "author": "Abdennacer-Badaoui",
     "author_association": "MEMBER",
-    "body_excerpt": "- Fix quantizer_aqlm.py to use renamed modules_to_not_convert parameter instead of removed linear_weights_not_to_quantize - Update test to match new function signature: no tuple return, module names instead of weight names",
-    "changed_files": 2,
+    "body_excerpt": "Summary - Update test expectations for affected models - Add some needed dependencies - Fix TypeError: `GenerationMixin.prepare_inputs_for_generation()` got multiple values for argument 'next_sequence_length' in Qwen2.5-Omni talker by pass\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44577",
-    "created_at": "2026-03-10T15:57:36Z",
-    "deletions": 5,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44632",
+    "created_at": "2026-03-12T13:32:33Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44577/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44577",
+    "files_url": "https://github.com/huggingface/transformers/pull/44632/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44632",
     "labels": [],
     "merged": true,
-    "number": 44577,
-    "review_comments_count": 0,
+    "number": 44632,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix: AQLM quantizer to match updated replace_with_aqlm_linear signature",
-    "updated_at": "2026-03-10T17:48:00Z"
+    "title": "[AMD CI] Fix test failures across important models ",
+    "updated_at": "2026-03-17T14:58:10Z"
   },
   {
-    "additions": 16,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44387. This PR disable async loading when we want to quantize the model. it is actually faster than doing a semaphore. If a quantizer happens to quantize fast\u2026",
-    "changed_files": 1,
+    "additions": 33,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Updates the weights conversion scripts for Gemma to: * Use the new `SentencePieceExtractor` class to get the vocab and merges from the SPM * Always initialize and save the unified `GemmaTokenizer` class ## Before su\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44576",
-    "created_at": "2026-03-10T15:07:01Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44631",
+    "created_at": "2026-03-12T13:32:25Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44576/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44576",
+    "files_url": "https://github.com/huggingface/transformers/pull/44631/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44631",
     "labels": [],
     "merged": true,
-    "number": 44576,
-    "review_comments_count": 1,
+    "number": 44631,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Disable async loading when quantizing on the fly",
-    "updated_at": "2026-03-16T16:36:42Z"
+    "title": "[Gemma] Update conversion scripts for Transformers v5 Comaptibility",
+    "updated_at": "2026-03-18T10:39:53Z"
   },
   {
-    "additions": 13,
-    "author": "Rocketknight1",
+    "additions": 42,
+    "author": "MaybeSam05",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a bug where `num_labels` passed to `AutoConfig.from_pretrained` for Qwen3.5 did not propagate from the top\u2011level `Qwen3_5Config` into the `text_config`, so `AutoModelForSequenceClassification` still saw the de\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44625-9",
+    "cluster_ids": [
+      "cluster-44625-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44630",
+    "created_at": "2026-03-12T13:25:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44630/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44630",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44630,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Qwen3.5 num_labels propagation to text_config (fix #44625)",
+    "updated_at": "2026-03-12T13:46:07Z"
+  },
+  {
+    "additions": 15,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "Some parameters in Tapas are initialized in `__init__()` and not reinitialized in `_init_weights()`, which means that if the model is created on the `meta` device, those parameters do not get a weight initialization. This causes a crash la\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes \"auto\" dtype when the model is initialized `from_config` It was already fixed for `from_pretrained` in https://github.com/huggingface/transformers/pull/42990 but vLLM creates models with `AutoModel._from_confi\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44575",
-    "created_at": "2026-03-10T14:42:40Z",
-    "deletions": 16,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44629",
+    "created_at": "2026-03-12T13:07:55Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44575/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44575",
+    "files_url": "https://github.com/huggingface/transformers/pull/44629/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44629",
     "labels": [],
     "merged": true,
-    "number": 44575,
-    "review_comments_count": 0,
+    "number": 44629,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Correct Tapas initialization",
-    "updated_at": "2026-03-10T15:14:37Z"
+    "title": "Ensure same `dtype` for subconfig when `_from_config`",
+    "updated_at": "2026-03-13T11:35:10Z"
   },
   {
-    "additions": 33,
-    "author": "ArthurZucker",
+    "additions": 37,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44448",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? - `encoder_config` and `decoder_config` should return `None` for encoder / decoder config classes themselves. - The encoder / decoder model classes should have the correct config classes associated to them <!-- CURS\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44571",
-    "created_at": "2026-03-10T14:28:22Z",
-    "deletions": 12,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44628",
+    "created_at": "2026-03-12T12:24:44Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44571/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44571",
+    "files_url": "https://github.com/huggingface/transformers/pull/44628/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44628",
     "labels": [],
     "merged": true,
-    "number": 44571,
-    "review_comments_count": 0,
+    "number": 44628,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix pegasus conversion",
-    "updated_at": "2026-03-18T09:55:00Z"
+    "title": "Fix for `VibeVoiceAcousticTokenizer`",
+    "updated_at": "2026-03-12T13:33:02Z"
   },
   {
-    "additions": 30,
-    "author": "umbilnm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? In transformers v5, `DebertaV2Tokenizer` was rewritten to use `TokenizersBackend`, but the `post_processor` responsible for adding `[CLS]`/`[SEP]` tokens was never set. This causes `add_special_tokens=True` to silen\u2026",
-    "changed_files": 2,
+    "additions": 141,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The diff in revert mapping is needed, otherwise we get failures in a few models, see https://app.circleci.com/pipelines/github/huggingface/transformers/167425/workflows/fa96efe5-f810-408e-bafd-de03b7e881aa/jobs/2208\u2026",
+    "changed_files": 78,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44570",
-    "created_at": "2026-03-10T13:37:06Z",
-    "deletions": 1,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44627",
+    "created_at": "2026-03-12T12:00:31Z",
+    "deletions": 367,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44570/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44570",
+    "files_url": "https://github.com/huggingface/transformers/pull/44627/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44627",
     "labels": [],
-    "merged": true,
-    "number": 44570,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44627,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "Fix missing post_processor in DebertaV2Tokenizer causing no special t\u2026",
-    "updated_at": "2026-03-24T09:40:44Z"
+    "title": "Move VLM conversions to the main mapping",
+    "updated_at": "2026-03-17T10:13:03Z"
   },
   {
-    "additions": 267,
-    "author": "aashay-sarvam",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds native support for the `sarvam_mla` model type (`sarvamai/sarvam-105b`) to HuggingFace Transformers using the **modular pattern**, inheriting from DeepSeek V3. ### Model Architecture SarvamMLA is a **105B para\u2026",
-    "changed_files": 10,
+    "additions": 11,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds a missing branch. I don't really know if this is worth it, can't find a model online that enforces the flag to `True`",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44569",
-    "created_at": "2026-03-10T11:55:01Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44626",
+    "created_at": "2026-03-12T11:23:21Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44569/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44569",
+    "files_url": "https://github.com/huggingface/transformers/pull/44626/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44626",
     "labels": [],
     "merged": false,
-    "number": 44569,
-    "review_comments_count": 20,
+    "number": 44626,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Add SarvamMLA model (sarvamai/sarvam-105b)",
-    "updated_at": "2026-03-18T13:58:18Z"
+    "title": "don't break legacy behavior when enforced!",
+    "updated_at": "2026-03-12T11:32:46Z"
   },
   {
-    "additions": 2,
-    "author": "tomaarsen",
+    "additions": 34,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Currently, when using Apertus (or rather, it's `XIELUActivation`), and you don't have `xielu` installed, then you'll fall to this `except:` https://github.com/huggingface/transformers/blob/5a098a1e01034095f037c8a37f\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Follow-up of #44549",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44567",
-    "created_at": "2026-03-10T11:39:20Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44624",
+    "created_at": "2026-03-12T09:26:17Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44567/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44567",
+    "files_url": "https://github.com/huggingface/transformers/pull/44624/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44624",
     "labels": [],
     "merged": true,
-    "number": 44567,
+    "number": 44624,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Prevent crash with Apertus without xielu installed",
-    "updated_at": "2026-03-10T13:24:11Z"
+    "title": "Fix more wrong HF hub checkpoint names",
+    "updated_at": "2026-03-12T09:59:12Z"
   },
   {
-    "additions": 181,
-    "author": "tarekziade",
+    "additions": 17,
+    "author": "LysandreJik",
     "author_association": "MEMBER",
-    "body_excerpt": "This patch extends `ty` check to `src/transformers/cli` Based on https://github.com/huggingface/transformers/pull/44412",
-    "changed_files": 8,
+    "body_excerpt": "CB temporarily disabled on non-cuda devices as it's currently using cuda streams by default.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44566",
-    "created_at": "2026-03-10T10:40:13Z",
-    "deletions": 86,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44622",
+    "created_at": "2026-03-12T08:11:10Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44566/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44566",
+    "files_url": "https://github.com/huggingface/transformers/pull/44622/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44622",
     "labels": [],
     "merged": false,
-    "number": 44566,
-    "review_comments_count": 21,
+    "number": 44622,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(typing): extend typing to `src/transformers/cli` ",
-    "updated_at": "2026-04-01T16:05:57Z"
+    "title": "CB temporarily disabled on non-cuda devices",
+    "updated_at": "2026-03-16T00:00:20Z"
   },
   {
-    "additions": 36,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. The unused memory is taken into account too late, which leads to different device_map for the same hardware and models, and even random cuda OOM!! Basically, the max memory needs to be adjusted BEF\u2026",
+    "additions": 0,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43170 for `modelcard` removal Quick reproduce: ``` from transformers import pipeline fmp=pipeline(\"fill-mask\",\"google-bert/bert-base-cased\") fmp.save_pretrained(\"tmpdir\") ``` ## Before submitting - [ ] This P\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44565",
-    "created_at": "2026-03-10T10:31:10Z",
-    "deletions": 22,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44621",
+    "created_at": "2026-03-12T08:04:29Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44565/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44565",
+    "files_url": "https://github.com/huggingface/transformers/pull/44621/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44621",
     "labels": [],
     "merged": true,
-    "number": 44565,
-    "review_comments_count": 2,
+    "number": 44621,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[device_map] Fix device_map computation by correctly adjusting memory available",
-    "updated_at": "2026-03-10T17:16:01Z"
+    "title": "pipelines do not have modelcard",
+    "updated_at": "2026-03-13T14:28:48Z"
   },
   {
-    "additions": 2,
-    "author": "ArthurZucker",
+    "additions": 15,
+    "author": "LysandreJik",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #44360",
-    "changed_files": 2,
-    "cluster_id": "cluster-44360-6",
-    "cluster_ids": [
-      "cluster-44360-6"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44564",
-    "created_at": "2026-03-10T10:14:21Z",
-    "deletions": 2,
+    "body_excerpt": "FastAPI doesn't play well with `from __future__ import annotations`. This PR reverts this change and correctly guards against unprotected optional imports. Reverts https://github.com/huggingface/transformers/pull/44256",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44620",
+    "created_at": "2026-03-12T07:56:55Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44564/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44564",
+    "files_url": "https://github.com/huggingface/transformers/pull/44620/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44620",
     "labels": [],
     "merged": true,
-    "number": 44564,
+    "number": 44620,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix glm dsa",
-    "updated_at": "2026-03-19T15:13:36Z"
+    "title": "Fix transformers serve's 422 unprocessable entity",
+    "updated_at": "2026-03-16T13:41:44Z"
   },
   {
-    "additions": 165,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Split out `mm_token_type_id` creation to a separate utility and just call it in VLMs. Also make sure that `mm_token_type_id` can be created even when `padding=False` and the inputs are of different length. As long a\u2026",
-    "changed_files": 37,
+    "additions": 43,
+    "author": "yunhaoli24",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Set `add_bos_token=True` and `add_eos_token=True` by default in `DebertaV2Tokenizer` to fix the regression where `add_special_tokens=True` doesn't add BOS/EOS tokens for `microsoft/mdeberta-v3-base` tokenizer in transformers >=5\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44563",
-    "created_at": "2026-03-10T10:13:29Z",
-    "deletions": 267,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44618",
+    "created_at": "2026-03-12T04:46:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44563/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44563",
+    "files_url": "https://github.com/huggingface/transformers/pull/44618/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44618",
     "labels": [],
-    "merged": true,
-    "number": 44563,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44618,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow `mm_token_type` be non-padded lists ",
-    "updated_at": "2026-03-25T11:33:46Z"
+    "title": "fix: Add BOS/EOS tokens by default for DeBERTa v2 tokenizer",
+    "updated_at": "2026-03-16T05:28:25Z"
   },
   {
-    "additions": 54,
-    "author": "SamArun28",
+    "additions": 7,
+    "author": "s-zx",
     "author_association": "NONE",
-    "body_excerpt": "#Standardizing the BERT model card as part of issue #36979 Changes made: - Added friendly description of BERT - Added Pipeline and AutoModel code examples - Added Notes section with helpful tips - Added Resources section with links @stevhl\u2026",
+    "body_excerpt": "## Summary Add fallback to bfloat16 when Float8 dtype fails to set, preventing TypeError when loading FP8 models on PyTorch builds without Float8_e4m3fnStorage support. ## Root Cause `torch.set_default_dtype(dtype)` raises `TypeError: coul\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44616",
+    "created_at": "2026-03-11T23:00:15Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44616/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44616",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44616,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: add Float8 dtype fallback in modeling_utils.py",
+    "updated_at": "2026-03-18T16:02:54Z"
+  },
+  {
+    "additions": 35,
+    "author": "MaybeSam05",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Restores `is_torch_fx_available` in `transformers.utils.import_utils` as a backwards-compatibility shim so that Hub models loaded with `trust_remote_code=True` that still import this symbol no longer raise `ImportEr\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44562",
-    "created_at": "2026-03-10T09:58:19Z",
-    "deletions": 111,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44615",
+    "created_at": "2026-03-11T22:52:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44562/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44562",
+    "files_url": "https://github.com/huggingface/transformers/pull/44615/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44615",
     "labels": [],
     "merged": false,
-    "number": 44562,
+    "number": 44615,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs: standardize BERT model card",
-    "updated_at": "2026-03-10T16:00:43Z"
+    "state": "open",
+    "title": "Restore is_torch_fx_available for trust_remote_code backwards compatibility",
+    "updated_at": "2026-03-12T10:33:43Z"
+  },
+  {
+    "additions": 19,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Only detected in #43590, so it can only be detected there for `test_sdpa_can_compile_dynamic` (`lw_detr`). Core issue: Dynamo can cache the attribute and ignore it across frames which means that updates/reads are not working as expected. T\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44614",
+    "created_at": "2026-03-11T20:49:51Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44614/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44614",
+    "labels": [],
+    "merged": false,
+    "number": 44614,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "[`Compile`] Fix capture outputs during compile",
+    "updated_at": "2026-03-13T02:15:21Z"
   },
   {
-    "additions": 368,
-    "author": "rain-1",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "> Adds support for the legacy text completions endpoint, which accepts a freeform text prompt (no chat template) and returns generated text in choices[].text. Supports both streaming and non-streaming modes, suffix for fill-in-the-middle i\u2026",
+    "additions": 105,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds docs for tensor parallelism for training",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44558",
-    "created_at": "2026-03-10T07:09:07Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44613",
+    "created_at": "2026-03-11T20:43:53Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44558/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44558",
+    "files_url": "https://github.com/huggingface/transformers/pull/44613/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44613",
     "labels": [],
-    "merged": false,
-    "number": 44558,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Add /v1/completions endpoint (OpenAI legacy completions API) to `transformers serve`",
-    "updated_at": "2026-04-06T09:23:32Z"
+    "merged": true,
+    "number": 44613,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] tp training",
+    "updated_at": "2026-04-09T22:20:47Z"
   },
   {
     "additions": 1,
-    "author": "black-yt",
-    "author_association": "NONE",
-    "body_excerpt": "This PR fixes a runtime `TypeError` encountered during model initialization when using Qwen3.5 configurations with recent `transformers` versions. The error occurs in `modeling_rope_utils.py` during RoPE parameter validation: ``` TypeError\u2026",
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The output function hook in `EmbeddingParallel` casts the mask to fp32. It breaks things for neuron devices. Suggested fix: cast to the outputs' dtype.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 16,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44555",
-    "created_at": "2026-03-10T01:52:18Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44612",
+    "created_at": "2026-03-11T20:09:41Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44555/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44555",
+    "files_url": "https://github.com/huggingface/transformers/pull/44612/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44612",
     "labels": [],
-    "merged": false,
-    "number": 44555,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44612,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix TypeError in RoPE validation when ignore_keys_at_rope_validation is a list",
-    "updated_at": "2026-03-28T10:04:35Z"
+    "title": "fix: cast to proper dtype in EmbeddingParallel",
+    "updated_at": "2026-03-12T21:08:04Z"
   },
   {
-    "additions": 233,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, wip and really unsure if we really want this",
-    "changed_files": 16,
+    "additions": 15,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 [This PR (\ud83d\udea8 Delete duplicate code in backbone utils)](https://github.com/huggingface/transformers/pull/43323) structured config loading to use [BackboneMi\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44553",
-    "created_at": "2026-03-10T01:04:49Z",
-    "deletions": 263,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44553/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44553",
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44611",
+    "created_at": "2026-03-11T20:02:14Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44611/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44611",
     "labels": [],
-    "merged": false,
-    "number": 44553,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "[`FA`] Refactor FA CB kwargs",
-    "updated_at": "2026-03-17T09:14:21Z"
+    "merged": true,
+    "number": 44611,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "fix(models): Forward timm model kwargs to timm.create_model for OmDet-Turbo",
+    "updated_at": "2026-03-13T11:57:20Z"
   },
   {
     "additions": 0,
-    "author": "ydshieh",
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "Reverts huggingface/transformers#44529",
+    "body_excerpt": "# What does this PR do? By default, `initialize_tensor_parallelism` hides stdout and stderr for ranks > 0. While convenient, this is not perfect for dev and debugging. I suggest we simply add a flag to be able to disable this feature if wa\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44552",
-    "created_at": "2026-03-09T21:05:46Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44608",
+    "created_at": "2026-03-11T18:57:01Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44552/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44552",
+    "files_url": "https://github.com/huggingface/transformers/pull/44608/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44608",
     "labels": [],
     "merged": true,
-    "number": 44552,
-    "review_comments_count": 0,
+    "number": 44608,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Revert \"test merge queue 1\"",
-    "updated_at": "2026-03-09T21:15:55Z"
+    "title": "Allow to disable stdout hiding for TP",
+    "updated_at": "2026-03-12T19:36:06Z"
   },
   {
-    "additions": 12,
-    "author": "echarlaix",
-    "author_association": "MEMBER",
-    "body_excerpt": "`self.rotary_emb` is always called since https://github.com/huggingface/transformers/pull/39847 while only being initialized when `config.use_mem_rope` is True inference failing since v5 for models `config.use_mem_rope=False` ``` Attribute\u2026",
-    "changed_files": 2,
+    "additions": 9,
+    "author": "gabe-l-hart",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes generation for models using the `Idefics3ForConditionalGeneration` architecture with `use_cache=False`. ## Testing <details> <summary>docling_repro.py</summary> ```py import os import torch import time\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44551",
-    "created_at": "2026-03-09T18:15:17Z",
-    "deletions": 2,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44607",
+    "created_at": "2026-03-11T18:41:58Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44551/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44551",
+    "files_url": "https://github.com/huggingface/transformers/pull/44607/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44607",
     "labels": [],
     "merged": true,
-    "number": 44551,
+    "number": 44607,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix zamba2 rotary embedding call when use_mem_rope is False",
-    "updated_at": "2026-03-10T11:43:36Z"
+    "title": "Idefics3 without cache fix",
+    "updated_at": "2026-03-16T15:23:34Z"
   },
   {
-    "additions": 6,
-    "author": "himani27301",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Improved wording and grammar in the Auto Classes documentation to enhance readability without changing functionality. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, you\u2026",
+    "additions": 26,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "In v5, we enforce creating a model-specific tokenizer (ex. LlamaTokenizer, Qwen2Tokenizer, et .) object when specified. 1. For instance, when `tokenizer_class` is set in `tokenization_config.json` 2. Or when using the auto_mapped `tokenize\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44550",
-    "created_at": "2026-03-09T16:12:59Z",
-    "deletions": 16,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44606",
+    "created_at": "2026-03-11T17:29:12Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44550/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44550",
+    "files_url": "https://github.com/huggingface/transformers/pull/44606/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44606",
     "labels": [],
     "merged": false,
-    "number": 44550,
-    "review_comments_count": 1,
+    "number": 44606,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Improve clarity and grammar in Auto Classes documentation",
-    "updated_at": "2026-03-09T16:32:29Z"
+    "title": "optionally override tokenizer class with serialized tokenizer ",
+    "updated_at": "2026-03-17T16:03:19Z"
   },
   {
-    "additions": 158,
-    "author": "ydshieh",
+    "additions": 564,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The previous values are simply wrong. (which also cause problems for tiny model creation)",
-    "changed_files": 51,
+    "body_excerpt": "* refactors DeepSpeed ZeRO doc: - moves the troubleshooting section to the more general Debugging guide to keep everything in one place - moves the sequence parallelism section into a new doc to give it more visibility - update to be more\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44549",
-    "created_at": "2026-03-09T15:44:36Z",
-    "deletions": 126,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44605",
+    "created_at": "2026-03-11T17:26:12Z",
+    "deletions": 1163,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44549/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44549",
+    "files_url": "https://github.com/huggingface/transformers/pull/44605/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44605",
     "labels": [],
     "merged": true,
-    "number": 44549,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix wrong (non-existing) checkpoints",
-    "updated_at": "2026-03-11T15:16:56Z"
-  },
-  {
-    "additions": 12,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43935 ## Summary - Added `eval_on_end` argument to `TrainingArguments` (default: `False`) - Added conditional evaluation at the end of training in `Trainer.train()`, symmetric to `eval_on_start` - Implementation mirrors the existing\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44548",
-    "created_at": "2026-03-09T15:03:27Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44548/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44548",
-    "labels": [],
-    "merged": false,
-    "number": 44548,
-    "review_comments_count": 0,
+    "number": 44605,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Add eval_on_end flag to Trainer",
-    "updated_at": "2026-03-09T19:31:59Z"
+    "title": "[docs] zero + sequence parallelism",
+    "updated_at": "2026-04-10T16:46:49Z"
   },
   {
-    "additions": 2,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44373 ## Summary - Corrected the docstring for `position_ids` parameter in `prepare_fa_kwargs_from_position_ids` and `_prepare_from_posids` which incorrectly described attention mask semantics (\"Boolean or int tensor... 1 means vali\u2026",
-    "changed_files": 1,
+    "additions": 415,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fix the quantization CI : - [x] autoround - [x] bnb - [x] fp_quant_integration - [x] metal - [x] mxfp4 - [x] quark_integration - [x] torchao_integration One major point in this PR is that I bump the min vers\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44547",
-    "created_at": "2026-03-09T14:59:27Z",
-    "deletions": 2,
+    "comments_count": 33,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44604",
+    "created_at": "2026-03-11T16:44:38Z",
+    "deletions": 912,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44547/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44547",
+    "files_url": "https://github.com/huggingface/transformers/pull/44604/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44604",
     "labels": [],
-    "merged": false,
-    "number": 44547,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Fix position_ids docstring in modeling_flash_attention_utils.py",
-    "updated_at": "2026-03-09T16:08:29Z"
+    "merged": true,
+    "number": 44604,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Bump torchao >=0.15 and fix quantization CI",
+    "updated_at": "2026-03-16T16:07:12Z"
   },
   {
-    "additions": 4,
-    "author": "Abdennacer-Badaoui",
-    "author_association": "MEMBER",
-    "body_excerpt": "The AMD Docker image build (`latest-pytorch-amd`) has been failing since early February due to the 6h GitHub Actions job time limit being exceeded. The root cause is that Flash Attention is cloned and compiled from the latest commit on the\u2026",
+    "additions": 17,
+    "author": "michalrzak",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? A quick fix that updates the Dockerfile to run on `arm64` systems (such as the NVIDIA Spark). The previous version of the Dockerfile fails on `arm64` systems due to `SudachiPy`, which only provides wheels for `x86_6\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44546",
-    "created_at": "2026-03-09T14:15:51Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44603",
+    "created_at": "2026-03-11T16:42:30Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44546/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44546",
+    "files_url": "https://github.com/huggingface/transformers/pull/44603/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44603",
     "labels": [],
-    "merged": true,
-    "number": 44546,
+    "merged": false,
+    "number": 44603,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix AMD Docker image build timeout by pinning Flash Attention commit",
-    "updated_at": "2026-03-09T14:37:50Z"
+    "state": "open",
+    "title": "fixed dockerfile for arm64 systems",
+    "updated_at": "2026-03-11T16:42:30Z"
   },
   {
-    "additions": 18,
+    "additions": 218,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44336",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. Follow up of https://github.com/huggingface/transformers/pull/44330 Also take the opportunity to simplify t5 and its children, because the way they compute`position_bias` was super convoluted/overc\u2026",
+    "changed_files": 61,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44544",
-    "created_at": "2026-03-09T11:38:09Z",
-    "deletions": 18,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44602",
+    "created_at": "2026-03-11T16:19:43Z",
+    "deletions": 1083,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44544/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44544",
+    "files_url": "https://github.com/huggingface/transformers/pull/44602/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44602",
     "labels": [],
     "merged": true,
-    "number": 44544,
-    "review_comments_count": 0,
+    "number": 44602,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "Fix ansi codes in loading reports when not connected to terminal",
-    "updated_at": "2026-03-09T11:52:16Z"
+    "title": "Remove `cache_position` in more models (2)",
+    "updated_at": "2026-03-12T22:38:15Z"
   },
   {
-    "additions": 154,
-    "author": "umbilnm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44521 `apply_chat_template` with `return_assistant_tokens_mask=True` returns all-zero masks when multimodal inputs (images/videos) are present. ## Root cause `generation_indices` (character-level positions of\u2026",
-    "changed_files": 2,
+    "additions": 510,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Goal is to be able to run a model with both PP and TP.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44543",
-    "created_at": "2026-03-09T10:47:05Z",
-    "deletions": 21,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44543/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44543",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44601",
+    "created_at": "2026-03-11T15:56:51Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44601/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44601",
     "labels": [],
     "merged": false,
-    "number": 44543,
+    "number": 44601,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix assistant_masks for multimodal inputs in apply_chat_template",
-    "updated_at": "2026-03-09T10:47:05Z"
+    "title": "[Distributed] Add PP support natively",
+    "updated_at": "2026-03-12T11:53:24Z"
   },
   {
-    "additions": 3,
+    "additions": 0,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Small mistake in https://github.com/huggingface/transformers/pull/44432. cc @zucchini-nlp, was it intended to remove the scaling? (I assume so since the embedding now has the saling baked in, and I guess paligemma a\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44542",
-    "created_at": "2026-03-09T10:00:29Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44600",
+    "created_at": "2026-03-11T15:40:16Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44542/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44542",
+    "files_url": "https://github.com/huggingface/transformers/pull/44600/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44600",
     "labels": [],
     "merged": true,
-    "number": 44542,
+    "number": 44600,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix backend dependency",
-    "updated_at": "2026-03-09T10:10:24Z"
+    "title": "Remove useless identity assignment",
+    "updated_at": "2026-03-12T10:21:23Z"
   },
   {
-    "additions": 1,
-    "author": "YangKai0616",
+    "additions": 3,
+    "author": "hf-security-analysis[bot]",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Per the title, an error occurs when `tp_plan` is empty due to [here](https://github.com/huggingface/transformers/blob/701628527ae1ef37473f05f5d94fac7f457a3f8f/src/transformers/core_model_loading.py#L1120): ``` [rank0]: Traceback (most rece\u2026",
+    "body_excerpt": "Update `.github/workflows/self-comment-ci.yml` workflow configuration. cc @vasqu Closes huggingface/tracking-issues#17",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44540",
-    "created_at": "2026-03-09T09:37:36Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44599",
+    "created_at": "2026-03-11T15:39:30Z",
+    "deletions": 161,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44540/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44540",
+    "files_url": "https://github.com/huggingface/transformers/pull/44599/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44599",
     "labels": [],
-    "merged": true,
-    "number": 44540,
+    "merged": false,
+    "number": 44599,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix UnboundLocalError for tp_plan_alt when tp_plan is empty",
-    "updated_at": "2026-03-11T13:42:39Z"
+    "title": "chore: update self-comment-ci.yml",
+    "updated_at": "2026-04-02T09:05:25Z"
   },
   {
-    "additions": 1333,
-    "author": "kmswin1",
-    "author_association": "NONE",
-    "body_excerpt": "A.X K1 \ubaa8\ub378 \ucd94\uac00\ud569\ub2c8\ub2e4. \uae30\uc874 inference \uc640 \ub3d9\uc77c\ud55c \uac83 \ud655\uc778\ud588\uc2b5\ub2c8\ub2e4.",
-    "changed_files": 8,
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, updated to remove former members as well cc @yonigozlan @ydshieh",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44539",
-    "created_at": "2026-03-09T09:03:10Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44598",
+    "created_at": "2026-03-11T15:13:25Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44539/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44539",
+    "files_url": "https://github.com/huggingface/transformers/pull/44598/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44598",
     "labels": [],
-    "merged": false,
-    "number": 44539,
+    "merged": true,
+    "number": 44598,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add A.X K1",
-    "updated_at": "2026-03-09T09:04:11Z"
+    "title": "Add Yoni to run-slow workflow",
+    "updated_at": "2026-03-11T15:38:10Z"
   },
   {
-    "additions": 14,
+    "additions": 1,
     "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of #44532: we need to change the required status check to the new added job `doc_build_status_check` added in this PR, otherwise the merge queue won't get the required (passing) status and will eventually\u2026",
+    "body_excerpt": "# What does this PR do? Our beautiful Dashboard is missing ..... damm",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44538",
-    "created_at": "2026-03-09T08:59:12Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44597",
+    "created_at": "2026-03-11T13:53:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44538/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44538",
+    "files_url": "https://github.com/huggingface/transformers/pull/44597/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44597",
     "labels": [],
     "merged": true,
-    "number": 44538,
+    "number": 44597,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add a new job in `build_pr_documentation.yml` (will be the new required job)",
-    "updated_at": "2026-03-09T09:12:25Z"
+    "title": "Fix CircleCI summary report not showing due to missing dependency",
+    "updated_at": "2026-03-20T07:33:38Z"
   },
   {
-    "additions": 2,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Per the title. Supplement skip logic for XPU in the CPU-only tp tests. Hi @IlyasMoutawwakil, please help review, thanks!",
-    "changed_files": 1,
+    "additions": 26,
+    "author": "Desel72",
+    "author_association": "NONE",
+    "body_excerpt": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype # What does this PR do? When loading FP8 models (e.g. `Qwen/Qwen3.5-35B-A3B-FP8`) with `dtype=\"auto\"`, the auto-detected dtype from checkpoint weight\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44536",
-    "created_at": "2026-03-09T08:09:41Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44596",
+    "created_at": "2026-03-11T13:03:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44536/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44536",
-    "labels": [],
-    "merged": true,
-    "number": 44536,
+    "files_url": "https://github.com/huggingface/transformers/pull/44596/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44596",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44596,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Supplement skip logic for XPU in the CPU-only tp tests",
-    "updated_at": "2026-03-09T10:10:49Z"
+    "title": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype",
+    "updated_at": "2026-03-11T14:00:39Z"
   },
   {
-    "additions": 53,
-    "author": "Anakintano",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Problem `Qwen2_5_VLProcessor.apply_chat_template` raises `ValueError: setting an array element with a sequence` when called with a batch of \u22652 conversations that include images under the default `padding=False` setting. **Root cause:**\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
+    "additions": 2324,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add [CHMv2 ](https://arxiv.org/abs/2603.06382) to Transformers",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44595",
+    "created_at": "2026-03-11T12:38:44Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44595/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44595",
+    "labels": [
+      "New model",
+      "run-slow"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44535",
-    "created_at": "2026-03-09T07:54:58Z",
-    "deletions": 10,
+    "merged": true,
+    "number": 44595,
+    "review_comments_count": 30,
+    "state": "closed",
+    "title": "Add CHMv2",
+    "updated_at": "2026-03-11T16:00:03Z"
+  },
+  {
+    "additions": 271,
+    "author": "vimal-crypto",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What this PR does This PR brings `ObjectDetectionPipeline` in line with its sister pipelines (`ZeroShotObjectDetectionPipeline`, `ImageClassificationPipeline`) by adding four enhancements to the postprocessing stage. ### Changes **1. Sc\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44594",
+    "created_at": "2026-03-11T12:37:46Z",
+    "deletions": 40,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44535/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44535",
+    "files_url": "https://github.com/huggingface/transformers/pull/44594/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44594",
     "labels": [],
     "merged": false,
-    "number": 44535,
+    "number": 44594,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix crash in Qwen2_5_VLProcessor when using batched input with padding=False",
-    "updated_at": "2026-03-09T12:44:00Z"
+    "title": "[Pipeline] Add top_k, label filtering, box_format and score sorting to ObjectDetectionPipeline",
+    "updated_at": "2026-03-11T12:37:46Z"
   },
   {
-    "additions": 0,
-    "author": "stargazerwh",
-    "author_association": "NONE",
-    "body_excerpt": "## Description The 'transformers run' command was removed in Transformers v5, but the documentation still contained references in 62 model documentation files. This PR removes all `<hfoption id='transformers CLI'>` and `<hfoption id='trans\u2026",
-    "changed_files": 62,
-    "cluster_id": "cluster-43827-9",
-    "cluster_ids": [
-      "cluster-43827-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44533",
-    "created_at": "2026-03-09T01:39:32Z",
-    "deletions": 439,
+    "additions": 15,
+    "author": "BenjaminBossan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Multiple PEFT tests are failing due to recent changes in transformers. - hf_device_map attribute may not exist in some cases - respect inference_mode in load_adapter - new model loading requires changes for bnb (SCB\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44592",
+    "created_at": "2026-03-11T10:41:51Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44533/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44533",
+    "files_url": "https://github.com/huggingface/transformers/pull/44592/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44592",
     "labels": [],
-    "merged": false,
-    "number": 44533,
+    "merged": true,
+    "number": 44592,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Remove references to removed 'transformers run' CLI command",
-    "updated_at": "2026-03-09T02:33:08Z"
+    "title": "FIX Multiple PEFT errors after v5 transition",
+    "updated_at": "2026-03-11T12:24:05Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? So we can use `Require Merge Queue` functionoality",
+    "additions": 60,
+    "author": "moktamd",
+    "author_association": "NONE",
+    "body_excerpt": "Adds `_apply_mps_fixes` in `sdpa_attention.py` to handle two upstream PyTorch MPS bugs: 1. **pytorch/pytorch#176767** (fixed in PyTorch 2.12): pads value tensor when `v_head_dim != q_head_dim` to avoid corrupted output. Affects DeepSeek mo\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44532",
-    "created_at": "2026-03-08T20:34:05Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44591",
+    "created_at": "2026-03-11T10:32:26Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44532/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44532",
+    "files_url": "https://github.com/huggingface/transformers/pull/44591/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44591",
     "labels": [],
-    "merged": true,
-    "number": 44532,
+    "merged": false,
+    "number": 44591,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update `build_pr_documentation` workflow for `merge_group` event",
-    "updated_at": "2026-03-08T20:42:57Z"
+    "title": "Add MPS SDPA workarounds for value head dim and bidirectional attention",
+    "updated_at": "2026-03-11T13:37:15Z"
   },
   {
-    "additions": 16,
-    "author": "s-zx",
+    "additions": 2,
+    "author": "pranay-3108",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `Qwen2_5_VLProcessor.apply_chat_template` crashing with a `ValueError` when called with a batch of conversations with different prompt lengths and `padding=False` (the default). ### Root cause In the `mm_toke\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "Fixes incorrect documentation for `position_ids` in `masking_utils.py`. The docstring previously described `position_ids` as `torch.Tensor`. This PR updates it to `torch.LongTensor` and aligns the description with the standard wording used\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44531",
-    "created_at": "2026-03-08T19:38:00Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44590",
+    "created_at": "2026-03-11T05:13:57Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44531/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44531",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44590/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44590",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44531,
+    "number": 44590,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen2_5_VLProcessor.apply_chat_template crash on unpadded batched input",
-    "updated_at": "2026-03-09T13:14:02Z"
+    "title": "Fix incorrect docstring for position_ids",
+    "updated_at": "2026-03-11T21:08:42Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh2",
+    "additions": 1,
+    "author": "jiqing-feng",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
+    "body_excerpt": "Fixes Llama4 model loading under BitsAndBytes (BNB) quantization mode. Router quantized incorrectly causes shape mismatch: Llama4Router inherits from nn.Linear, so BNB quantizes its weight into a packed format. However, super().forward() c\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44529",
-    "created_at": "2026-03-08T18:12:54Z",
-    "deletions": 0,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44588",
+    "created_at": "2026-03-11T01:42:33Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44529/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44529",
+    "files_url": "https://github.com/huggingface/transformers/pull/44588/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44588",
     "labels": [],
     "merged": true,
-    "number": 44529,
-    "review_comments_count": 0,
+    "number": 44588,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "test merge queue 1",
-    "updated_at": "2026-03-09T21:01:09Z"
+    "title": "Fix llama4 bnb mode",
+    "updated_at": "2026-03-27T14:19:14Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
-    "changed_files": 1,
+    "additions": 32,
+    "author": "kmbhattt-aws",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44528",
-    "created_at": "2026-03-08T17:54:32Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44528/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44528",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44587",
+    "created_at": "2026-03-11T01:01:18Z",
+    "deletions": 20,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44587/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44587",
     "labels": [],
     "merged": false,
-    "number": 44528,
+    "number": 44587,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "test merge queue 1",
-    "updated_at": "2026-03-09T20:39:15Z"
+    "state": "open",
+    "title": "Fix: Handling fused qkv result tensor slicing for tp sharded qkv weights",
+    "updated_at": "2026-03-12T21:31:29Z"
   },
   {
-    "additions": 3,
-    "author": "Sai-Suraj-27",
+    "additions": 91,
+    "author": "mvanhorn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes these failing [MusicgenStereoIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500451700#step:14:7870) ## Before submitting - [ ] This PR fixes a typo or improves the\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Decouples router logits collection from output visibility in Mixtral's `ForCausalLM`. Previously, `output_router_logits=False` (the default) prevented `aux_loss` from being computed, meaning load balancing was silen\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44527",
-    "created_at": "2026-03-08T14:26:02Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44586",
+    "created_at": "2026-03-11T00:24:07Z",
+    "deletions": 39,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44527/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44527",
+    "files_url": "https://github.com/huggingface/transformers/pull/44586/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44586",
     "labels": [],
-    "merged": true,
-    "number": 44527,
+    "merged": false,
+    "number": 44586,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `MusicgenStereo` integration tests",
-    "updated_at": "2026-03-10T12:28:39Z"
+    "title": "Fix Mixtral aux_loss not computed when output_router_logits=False",
+    "updated_at": "2026-03-11T14:31:21Z"
   },
   {
-    "additions": 90,
-    "author": "JoursBleu",
+    "additions": 10,
+    "author": "mvanhorn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Add GGUF loading support for MiniMax-M2.1 (456B MoE) model. MiniMax-M2.1 is a large Mixture-of-Experts model with 456B total parameters (45.9B active), 256 experts and 8 experts per token. This PR enables loading it\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Passes `eps=config.rms_norm_eps` to both `q_a_layernorm` and `kv_a_layernorm` in the DeepseekV3 MLA attention module. Without this, these layernorms default to `eps=1e-5` instead of the config value (`1e-6`), causin\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44526",
-    "created_at": "2026-03-08T09:57:38Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44585",
+    "created_at": "2026-03-11T00:20:54Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44526/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44526",
+    "files_url": "https://github.com/huggingface/transformers/pull/44585/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44585",
+    "labels": [],
+    "merged": false,
+    "number": 44585,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix missing rms_norm_eps in DeepseekV3 MLA layernorms",
+    "updated_at": "2026-03-12T14:39:12Z"
+  },
+  {
+    "additions": 1,
+    "author": "mvanhorn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an off-by-one error in `decode_spans()` in the document question answering pipeline that causes a `ValueError: kth(=N) out of bounds` crash when `len(scores_flat) == topk`. The boundary check on line 97 uses `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44584",
+    "created_at": "2026-03-10T23:52:51Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44584/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44584",
     "labels": [],
     "merged": true,
-    "number": 44526,
-    "review_comments_count": 1,
+    "number": 44584,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add GGUF support for MiniMax-M2.1 model",
-    "updated_at": "2026-03-18T14:39:20Z"
+    "title": "Fix off-by-one in decode_spans boundary check",
+    "updated_at": "2026-03-12T13:22:10Z"
   },
   {
-    "additions": 1,
-    "author": "jnMetaCode",
+    "additions": 45,
+    "author": "wilnn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes a `KeyError` crash in `_parse_type_hint` in `chat_template_utils.py` (line 117). When processing Union types, the code accesses `subtype[\"type\"]` without checking the key exists. `_get_json_schema_type(Any)` returns `{}` (\u2026",
-    "changed_files": 1,
+    "body_excerpt": "\u2026kpoint when `save_strategy` is `best` # What does this PR do? fix load_best_model_checkpoint_at_end do not load the best model checkpoint at the end when `save_strategy` is `\"best\"` Fixes # (issue) fix load_best_model_checkpoint_at_end do\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44525",
-    "created_at": "2026-03-08T09:21:27Z",
-    "deletions": 1,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44583",
+    "created_at": "2026-03-10T22:37:36Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44525/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44525",
+    "files_url": "https://github.com/huggingface/transformers/pull/44583/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44583",
     "labels": [],
     "merged": true,
-    "number": 44525,
-    "review_comments_count": 0,
+    "number": 44583,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix KeyError in _parse_type_hint when Union contains Any",
-    "updated_at": "2026-03-09T13:43:23Z"
+    "title": "fix load_best_model_checkpoint_at_end do not load the best model chec\u2026",
+    "updated_at": "2026-03-24T15:42:26Z"
   },
   {
-    "additions": 1,
-    "author": "jnMetaCode",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes a bug in `AssistantTracker.is_active()` in `chat_template_utils.py`. After activation via `activate_tracker()`, `_rendered_blocks` and `_generation_indices` are set to list arguments which may be empty `[]`. The `is_active\u2026",
+    "additions": 3,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix an issue introduced in #42564 . The refactor embedded raw image tokens instead of BPE tokens, causing the model to output gibberish. This fix adds back the image tokens to BPE tokens conversion before embedding.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44524",
-    "created_at": "2026-03-08T09:21:25Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44582",
+    "created_at": "2026-03-10T21:00:30Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44524/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44524",
+    "files_url": "https://github.com/huggingface/transformers/pull/44582/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44582",
     "labels": [],
     "merged": true,
-    "number": 44524,
+    "number": 44582,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AssistantTracker.is_active() returning False after activation with empty lists",
-    "updated_at": "2026-03-09T13:36:19Z"
+    "title": "Fix missing BPE token conversion step in Chameleon",
+    "updated_at": "2026-03-11T11:26:49Z"
   },
   {
-    "additions": 2,
-    "author": "jnMetaCode",
+    "additions": 9,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes two small bugs in `load_sharded_checkpoint` in `trainer_utils.py`: **Bug 1 \u2014 Copy-paste error in error message (line 1108):** When reporting unexpected keys, the error message incorrectly says \"Missing key(s)\" instead of \"\u2026",
-    "changed_files": 1,
+    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 **Llama-4 Vision:** [freqs_ci is stored as a plain attr](https://github.com/huggingface/transformers/blob/153894c013/src/transformers/models/llama4/mode\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44523",
-    "created_at": "2026-03-08T09:21:22Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44581",
+    "created_at": "2026-03-10T19:33:51Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44523/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44523",
+    "files_url": "https://github.com/huggingface/transformers/pull/44581/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44581",
     "labels": [],
     "merged": true,
-    "number": 44523,
-    "review_comments_count": 0,
+    "number": 44581,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix error message label and docstring default in load_sharded_checkpoint",
-    "updated_at": "2026-03-10T15:48:41Z"
+    "title": "fix(models, testing): Fix Llama4 vision rotary meta tensor initialization and MyT5 get_tokenizer signature",
+    "updated_at": "2026-03-13T16:13:09Z"
   },
   {
-    "additions": 41,
-    "author": "nakigami",
+    "additions": 16,
+    "author": "rabbierabbie",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR introduces initial unit test coverage for the `transformers-cli` tool, specifically focusing on diagnostic and model utility commands. Currently, these CLI entry points lack automated tests. These new tests\u2026",
+    "body_excerpt": "This PR clarifies references to the **Transformers library** in the README. While reading the documentation as a new user, I initially found the term **\"Transformers\"** ambiguous because it could refer either to the **Transformer architect\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44520",
-    "created_at": "2026-03-08T01:30:39Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44580",
+    "created_at": "2026-03-10T18:21:55Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44520/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44520",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44580/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44580",
+    "labels": [],
     "merged": false,
-    "number": 44520,
+    "number": 44580,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "test(cli): add unit tests for env and model utility commands",
-    "updated_at": "2026-03-09T13:19:15Z"
+    "title": "Clarify references to the Transformers library in README",
+    "updated_at": "2026-03-11T13:24:58Z"
   },
   {
-    "additions": 3,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes these failing [MarianIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500458014#step:14:6186) <img width=\"2378\" height=\"657\" alt=\"image\" src=\"https://github.com/user\u2026",
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We need to fetch the specific commit (the so called merge commit created by Github itself)",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44519",
-    "created_at": "2026-03-07T19:53:23Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44579",
+    "created_at": "2026-03-10T16:58:09Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44519/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44519",
+    "files_url": "https://github.com/huggingface/transformers/pull/44579/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44579",
     "labels": [],
     "merged": true,
-    "number": 44519,
+    "number": 44579,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `MarianIntegrationTests`",
-    "updated_at": "2026-03-09T14:11:12Z"
+    "title": "Fix PR comment CI for quantization job",
+    "updated_at": "2026-03-10T17:07:11Z"
   },
   {
-    "additions": 12,
-    "author": "KartikPawade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44514 `Qwen2_5_VLProcessor.__call__` crashed with a `ValueError` when processing a batch of conversations with different lengths and `padding=False` (the default). **Root cause:** The `mm_token_type_ids` blo\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44518",
-    "created_at": "2026-03-07T19:22:40Z",
-    "deletions": 10,
+    "additions": 449,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR adds three attributes to the compile config, to have granularity over how varlen (handles mixed prefil and decode batches) and decode (only decode batches) are compiled. We want to have this kind of granularity because va\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44578",
+    "created_at": "2026-03-10T16:31:20Z",
+    "deletions": 121,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44518/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44518",
+    "files_url": "https://github.com/huggingface/transformers/pull/44578/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44578",
     "labels": [],
-    "merged": false,
-    "number": 44518,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44578,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix: Qwen2_5_VLProcessor crashes on batched input when padding=False \u2026",
-    "updated_at": "2026-03-10T18:57:10Z"
+    "title": "[CB] Better parametrization for compile",
+    "updated_at": "2026-03-19T11:50:08Z"
   },
   {
-    "additions": 12637,
-    "author": "ShahVandit",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds Qwen3-TTS, a series of text-to-speech models by the Qwen team (Alibaba Group), to Transformers. **Architecture:** - `Qwen3TTSForConditionalGeneration` \u2014 text to multi-codebook speech codes (talker) - `Qwen3TTS\u2026",
-    "changed_files": 16,
+    "additions": 5,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "- Fix quantizer_aqlm.py to use renamed modules_to_not_convert parameter instead of removed linear_weights_not_to_quantize - Update test to match new function signature: no tuple return, module names instead of weight names",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44517",
-    "created_at": "2026-03-07T18:48:04Z",
-    "deletions": 24,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44577",
+    "created_at": "2026-03-10T15:57:36Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44517/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44517",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": false,
-    "number": 44517,
-    "review_comments_count": 19,
-    "state": "open",
-    "title": "Add qwen3 tts",
-    "updated_at": "2026-04-13T07:53:06Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44577/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44577",
+    "labels": [],
+    "merged": true,
+    "number": 44577,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: AQLM quantizer to match updated replace_with_aqlm_linear signature",
+    "updated_at": "2026-03-10T17:48:00Z"
   },
   {
-    "additions": 65,
-    "author": "JasonCZMeng",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fix `Qwen2_5_VLProcessor.apply_chat_template` crashing with `ValueError` when called with batched inputs of different sequence lengths (ragged lists) and `padding=False` (the default). Fixes #44514 ## Root Cause The `mm_token_ty\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-44514-8",
-    "cluster_ids": [
-      "cluster-44514-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44516",
-    "created_at": "2026-03-07T18:33:40Z",
-    "deletions": 10,
+    "additions": 16,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44387. This PR disable async loading when we want to quantize the model. it is actually faster than doing a semaphore. If a quantizer happens to quantize fast\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44576",
+    "created_at": "2026-03-10T15:07:01Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44516/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44516",
+    "files_url": "https://github.com/huggingface/transformers/pull/44576/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44576",
     "labels": [],
-    "merged": false,
-    "number": 44516,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44576,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix(qwen2_5_vl): handle ragged batched input in apply_chat_template",
-    "updated_at": "2026-03-09T13:14:22Z"
+    "title": "Disable async loading when quantizing on the fly",
+    "updated_at": "2026-03-16T16:36:42Z"
   },
   {
-    "additions": 1,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [GPTNeoModelLanguageGenerationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500455894#step:14:1483) <img width=\"2363\" height=\"239\" alt=\"image\" src=\"https://githu\u2026",
+    "additions": 13,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some parameters in Tapas are initialized in `__init__()` and not reinitialized in `_init_weights()`, which means that if the model is created on the `meta` device, those parameters do not get a weight initialization. This causes a crash la\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44515",
-    "created_at": "2026-03-07T18:16:35Z",
-    "deletions": 1,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44575",
+    "created_at": "2026-03-10T14:42:40Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44515/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44515",
+    "files_url": "https://github.com/huggingface/transformers/pull/44575/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44575",
     "labels": [],
     "merged": true,
-    "number": 44515,
+    "number": 44575,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `GPTNeoModelLanguageGenerationTest`",
-    "updated_at": "2026-03-09T14:11:21Z"
+    "title": "Correct Tapas initialization",
+    "updated_at": "2026-03-10T15:14:37Z"
   },
   {
-    "additions": 1,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 69,
-    "cluster_id": "cluster-43827-9",
-    "cluster_ids": [
-      "cluster-43827-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44513",
-    "created_at": "2026-03-07T16:11:55Z",
-    "deletions": 492,
+    "additions": 33,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #44448",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44571",
+    "created_at": "2026-03-10T14:28:22Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44513/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44513",
+    "files_url": "https://github.com/huggingface/transformers/pull/44571/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44571",
     "labels": [],
     "merged": true,
-    "number": 44513,
+    "number": 44571,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Remove references to transformers run command",
-    "updated_at": "2026-03-09T15:37:16Z"
+    "title": "Fix pegasus conversion",
+    "updated_at": "2026-03-18T09:55:00Z"
   },
   {
-    "additions": 4,
-    "author": "04cb",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44355. The inspect.getsource() call raises TypeError when running compiled Python files with Cython-compiled functions. Added try-except block to gracefully handle this case by returning a default indentation level of 4.",
-    "changed_files": 1,
+    "additions": 30,
+    "author": "umbilnm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In transformers v5, `DebertaV2Tokenizer` was rewritten to use `TokenizersBackend`, but the `post_processor` responsible for adding `[CLS]`/`[SEP]` tokens was never set. This causes `add_special_tokens=True` to silen\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44511",
-    "created_at": "2026-03-07T05:36:25Z",
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44570",
+    "created_at": "2026-03-10T13:37:06Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44511/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44511",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44511,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix get_docstring_indentation_level to handle compiled functions",
-    "updated_at": "2026-03-09T13:10:06Z"
-  },
-  {
-    "additions": 3,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 27,
-    "cluster_id": "cluster-43827-9",
-    "cluster_ids": [
-      "cluster-43827-9"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44510",
-    "created_at": "2026-03-06T23:37:51Z",
-    "deletions": 358,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44510/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44510",
+    "files_url": "https://github.com/huggingface/transformers/pull/44570/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44570",
     "labels": [],
     "merged": true,
-    "number": 44510,
+    "number": 44570,
     "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix: Remove references to `text2text-generation`, `summarization` and `translation` pipeline tasks",
-    "updated_at": "2026-03-10T00:39:30Z"
+    "title": "Fix missing post_processor in DebertaV2Tokenizer causing no special t\u2026",
+    "updated_at": "2026-03-24T09:40:44Z"
   },
   {
-    "additions": 8,
-    "author": "KartikPawade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Older OwlViT checkpoints stored `position_ids` as buffers in the text and vision embedding modules. These tensors are simple integer ranges (0 \u2192 max sequence length) and are now recomputed dynamically during initial\u2026",
-    "changed_files": 2,
+    "additions": 267,
+    "author": "aashay-sarvam",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds native support for the `sarvam_mla` model type (`sarvamai/sarvam-105b`) to HuggingFace Transformers using the **modular pattern**, inheriting from DeepSeek V3. ### Model Architecture SarvamMLA is a **105B para\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44508",
-    "created_at": "2026-03-06T18:49:59Z",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44569",
+    "created_at": "2026-03-10T11:55:01Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44508/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44508",
+    "files_url": "https://github.com/huggingface/transformers/pull/44569/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44569",
     "labels": [],
-    "merged": true,
-    "number": 44508,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix unexpected `position_ids` keys when loading OwlViT models",
-    "updated_at": "2026-03-18T18:30:48Z"
+    "merged": false,
+    "number": 44569,
+    "review_comments_count": 20,
+    "state": "open",
+    "title": "Add SarvamMLA model (sarvamai/sarvam-105b)",
+    "updated_at": "2026-03-18T13:58:18Z"
   },
   {
-    "additions": 4,
-    "author": "0xDELUXA",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `torch.distributed.fsdp` is not available in all PyTorch builds (for example, Windows ROCm). Importing it unconditionally at the top level causes an immediate crash with: ``` ModuleNotFoundError: No module named 'to\u2026",
+    "additions": 2,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Currently, when using Apertus (or rather, it's `XIELUActivation`), and you don't have `xielu` installed, then you'll fall to this `except:` https://github.com/huggingface/transformers/blob/5a098a1e01034095f037c8a37f\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44507",
-    "created_at": "2026-03-06T18:03:49Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44567",
+    "created_at": "2026-03-10T11:39:20Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44507/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44507",
+    "files_url": "https://github.com/huggingface/transformers/pull/44567/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44567",
     "labels": [],
     "merged": true,
-    "number": 44507,
+    "number": 44567,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Conditionally import `torch.distributed.fsdp` in `trainer_seq2seq.py`",
-    "updated_at": "2026-03-13T10:17:56Z"
+    "title": "[`fix`] Prevent crash with Apertus without xielu installed",
+    "updated_at": "2026-03-10T13:24:11Z"
   },
   {
-    "additions": 1,
-    "author": "michaelbenayoun",
+    "additions": 181,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The current implementation does not work with the `mps` device and TP. ## Example script script.py ``` import os os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\" import torch from transformers import AutoModelForCaus\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This patch extends `ty` check to `src/transformers/cli` Based on https://github.com/huggingface/transformers/pull/44412",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44506",
-    "created_at": "2026-03-06T18:03:33Z",
-    "deletions": 1,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44566",
+    "created_at": "2026-03-10T10:40:13Z",
+    "deletions": 86,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44506/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44506",
+    "files_url": "https://github.com/huggingface/transformers/pull/44566/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44566",
     "labels": [],
-    "merged": true,
-    "number": 44506,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 44566,
+    "review_comments_count": 21,
     "state": "closed",
-    "title": "Tensor Parallelism and `mps` device",
-    "updated_at": "2026-03-11T15:16:49Z"
+    "title": "chore(typing): extend typing to `src/transformers/cli` ",
+    "updated_at": "2026-04-01T16:05:57Z"
   },
   {
-    "additions": 16,
-    "author": "kushalkkb",
-    "author_association": "NONE",
-    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
+    "additions": 36,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. The unused memory is taken into account too late, which leads to different device_map for the same hardware and models, and even random cuda OOM!! Basically, the max memory needs to be adjusted BEF\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44505",
-    "created_at": "2026-03-06T17:47:37Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44565",
+    "created_at": "2026-03-10T10:31:10Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44505/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44505",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44505,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44565/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44565",
+    "labels": [],
+    "merged": true,
+    "number": 44565,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Improve error handling in load_vocab for invalid vocabulary path",
-    "updated_at": "2026-03-10T04:14:31Z"
+    "title": "[device_map] Fix device_map computation by correctly adjusting memory available",
+    "updated_at": "2026-03-10T17:16:01Z"
   },
   {
-    "additions": 13,
-    "author": "kushalkkb",
-    "author_association": "NONE",
-    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44504",
-    "created_at": "2026-03-06T17:24:10Z",
-    "deletions": 0,
+    "additions": 2,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #44360",
+    "changed_files": 2,
+    "cluster_id": "cluster-44360-6",
+    "cluster_ids": [
+      "cluster-44360-6"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44564",
+    "created_at": "2026-03-10T10:14:21Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44504/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44504",
+    "files_url": "https://github.com/huggingface/transformers/pull/44564/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44564",
     "labels": [],
-    "merged": false,
-    "number": 44504,
+    "merged": true,
+    "number": 44564,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve error handling in load_vocab for invalid vocabulary path",
-    "updated_at": "2026-03-06T17:46:17Z"
+    "title": "Fix glm dsa",
+    "updated_at": "2026-03-19T15:13:36Z"
   },
   {
-    "additions": 8,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Moonshine:** In [MoonshineEncoder.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/moonshine/modular_moon\u2026",
-    "changed_files": 4,
+    "additions": 165,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Split out `mm_token_type_id` creation to a separate utility and just call it in VLMs. Also make sure that `mm_token_type_id` can be created even when `padding=False` and the inputs are of different length. As long a\u2026",
+    "changed_files": 37,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44503",
-    "created_at": "2026-03-06T17:08:00Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44563",
+    "created_at": "2026-03-10T10:13:29Z",
+    "deletions": 267,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44503/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44503",
+    "files_url": "https://github.com/huggingface/transformers/pull/44563/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44563",
     "labels": [],
     "merged": true,
-    "number": 44503,
-    "review_comments_count": 0,
+    "number": 44563,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix(testing): Fix MoonshineEncoder UnboundLocalError and Florence2VisionBackbone dtype mismatch",
-    "updated_at": "2026-03-09T18:06:17Z"
+    "title": "Allow `mm_token_type` be non-padded lists ",
+    "updated_at": "2026-03-25T11:33:46Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Introduced in https://github.com/huggingface/transformers/pull/44381, not sure why the CI passed",
+    "additions": 54,
+    "author": "SamArun28",
+    "author_association": "NONE",
+    "body_excerpt": "#Standardizing the BERT model card as part of issue #36979 Changes made: - Added friendly description of BERT - Added Pipeline and AutoModel code examples - Added Notes section with helpful tips - Added Resources section with links @stevhl\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44502",
-    "created_at": "2026-03-06T17:03:17Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44562",
+    "created_at": "2026-03-10T09:58:19Z",
+    "deletions": 111,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44502/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44502",
+    "files_url": "https://github.com/huggingface/transformers/pull/44562/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44562",
     "labels": [],
-    "merged": true,
-    "number": 44502,
+    "merged": false,
+    "number": 44562,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix type checker",
-    "updated_at": "2026-03-06T17:09:37Z"
+    "title": "docs: standardize BERT model card",
+    "updated_at": "2026-03-10T16:00:43Z"
+  },
+  {
+    "additions": 368,
+    "author": "rain-1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "> Adds support for the legacy text completions endpoint, which accepts a freeform text prompt (no chat template) and returns generated text in choices[].text. Supports both streaming and non-streaming modes, suffix for fill-in-the-middle i\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44558",
+    "created_at": "2026-03-10T07:09:07Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44558/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44558",
+    "labels": [],
+    "merged": false,
+    "number": 44558,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add /v1/completions endpoint (OpenAI legacy completions API) to `transformers serve`",
+    "updated_at": "2026-04-06T09:23:32Z"
   },
   {
     "additions": 1,
-    "author": "frogNotToad",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Changes the word \"maximize\" to \"minimize\" in the docs Fixes #44492 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). ## Who can review? Anyon\u2026",
+    "author": "black-yt",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes a runtime `TypeError` encountered during model initialization when using Qwen3.5 configurations with recent `transformers` versions. The error occurs in `modeling_rope_utils.py` during RoPE parameter validation: ``` TypeError\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44501",
-    "created_at": "2026-03-06T16:58:14Z",
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44555",
+    "created_at": "2026-03-10T01:52:18Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44501/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44501",
+    "files_url": "https://github.com/huggingface/transformers/pull/44555/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44555",
     "labels": [],
-    "merged": true,
-    "number": 44501,
+    "merged": false,
+    "number": 44555,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fixed typo in docs/source/en/kv_cache.md",
-    "updated_at": "2026-03-06T20:05:36Z"
+    "title": "Fix TypeError in RoPE validation when ignore_keys_at_rope_validation is a list",
+    "updated_at": "2026-03-28T10:04:35Z"
   },
   {
-    "additions": 18,
-    "author": "tarekziade",
+    "additions": 233,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Do proper type check in case jax is installed. - Make sure older torch versions don't raise typing issues",
-    "changed_files": 4,
+    "body_excerpt": "As per title, wip and really unsure if we really want this",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44500",
-    "created_at": "2026-03-06T16:56:12Z",
-    "deletions": 16,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44500/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44500",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44553",
+    "created_at": "2026-03-10T01:04:49Z",
+    "deletions": 263,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44553/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44553",
     "labels": [],
-    "merged": true,
-    "number": 44500,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Follow-up typing checking fixes",
-    "updated_at": "2026-03-09T10:47:31Z"
+    "merged": false,
+    "number": 44553,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "[`FA`] Refactor FA CB kwargs",
+    "updated_at": "2026-03-17T09:14:21Z"
   },
   {
-    "additions": 11,
-    "author": "michaelbenayoun",
+    "additions": 0,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR is just a small cleanup. The `TensorParallelLayer` class defines `_prepare_input_fn` and `_prepare_output_fn` as static methods. But then these methods end-up being instance or static methods in the sub clas\u2026",
+    "body_excerpt": "Reverts huggingface/transformers#44529",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44499",
-    "created_at": "2026-03-06T16:46:18Z",
-    "deletions": 18,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44552",
+    "created_at": "2026-03-09T21:05:46Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44499/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44499",
+    "files_url": "https://github.com/huggingface/transformers/pull/44552/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44552",
     "labels": [],
     "merged": true,
-    "number": 44499,
+    "number": 44552,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Make `_prepare_input_fn` and `_prepare_output_fn` instance methods",
-    "updated_at": "2026-03-10T13:53:18Z"
+    "title": "Revert \"test merge queue 1\"",
+    "updated_at": "2026-03-09T21:15:55Z"
   },
   {
-    "additions": 1,
-    "author": "michaelbenayoun",
+    "additions": 12,
+    "author": "echarlaix",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add the `neuron` backend for initialization in TP.",
-    "changed_files": 1,
+    "body_excerpt": "`self.rotary_emb` is always called since https://github.com/huggingface/transformers/pull/39847 while only being initialized when `config.use_mem_rope` is True inference failing since v5 for models `config.use_mem_rope=False` ``` Attribute\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44498",
-    "created_at": "2026-03-06T16:23:18Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44551",
+    "created_at": "2026-03-09T18:15:17Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44498/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44498",
+    "files_url": "https://github.com/huggingface/transformers/pull/44551/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44551",
     "labels": [],
     "merged": true,
-    "number": 44498,
+    "number": 44551,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: add neuron in tensor parallelism initialization",
-    "updated_at": "2026-03-12T18:07:52Z"
+    "title": "Fix zamba2 rotary embedding call when use_mem_rope is False",
+    "updated_at": "2026-03-10T11:43:36Z"
   },
   {
-    "additions": 43,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44466 and avoid issues with torch `.bin` checkpoints which always contain both keys!",
-    "changed_files": 2,
+    "additions": 6,
+    "author": "himani27301",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Improved wording and grammar in the Auto Classes documentation to enhance readability without changing functionality. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, you\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44497",
-    "created_at": "2026-03-06T16:21:14Z",
-    "deletions": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44550",
+    "created_at": "2026-03-09T16:12:59Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44497/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44497",
+    "files_url": "https://github.com/huggingface/transformers/pull/44550/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44550",
     "labels": [],
-    "merged": true,
-    "number": 44497,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[tie weights] \ud83d\udea8 If both weights are present with same weights, still tie them",
-    "updated_at": "2026-03-09T15:00:25Z"
+    "merged": false,
+    "number": 44550,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Improve clarity and grammar in Auto Classes documentation",
+    "updated_at": "2026-03-09T16:32:29Z"
   },
   {
-    "additions": 69,
-    "author": "vasqu",
+    "additions": 158,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, WIP",
-    "changed_files": 354,
+    "body_excerpt": "# What does this PR do? The previous values are simply wrong. (which also cause problems for tiny model creation)",
+    "changed_files": 51,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44495",
-    "created_at": "2026-03-06T13:57:04Z",
-    "deletions": 521,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44495/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44495",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44549",
+    "created_at": "2026-03-09T15:44:36Z",
+    "deletions": 126,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44549/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44549",
     "labels": [],
-    "merged": false,
-    "number": 44495,
+    "merged": true,
+    "number": 44549,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[`Gradient Ckpting`] Remove unnecessary attribute definitions",
-    "updated_at": "2026-03-06T13:58:22Z"
+    "state": "closed",
+    "title": "Fix wrong (non-existing) checkpoints",
+    "updated_at": "2026-03-11T15:16:56Z"
   },
   {
-    "additions": 13,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "- updates `ty` to `0.2.0` - pinned regex package (older versions did not have typing stubs) - fixed a couple of typing failures that went through via other parallel branches",
-    "changed_files": 6,
+    "additions": 12,
+    "author": "mvanhorn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43935 ## Summary - Added `eval_on_end` argument to `TrainingArguments` (default: `False`) - Added conditional evaluation at the end of training in `Trainer.train()`, symmetric to `eval_on_start` - Implementation mirrors the existing\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44494",
-    "created_at": "2026-03-06T12:57:25Z",
-    "deletions": 11,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44548",
+    "created_at": "2026-03-09T15:03:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44494/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44494",
+    "files_url": "https://github.com/huggingface/transformers/pull/44548/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44548",
     "labels": [],
-    "merged": true,
-    "number": 44494,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 44548,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update `ty` to 0.0.20",
-    "updated_at": "2026-03-06T13:30:25Z"
+    "title": "Add eval_on_end flag to Trainer",
+    "updated_at": "2026-03-09T19:31:59Z"
   },
   {
-    "additions": 439,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Since I removed some folders (fsdp, deepspeed) related to training, I need to modify the workflows !",
-    "changed_files": 18,
+    "additions": 2,
+    "author": "mvanhorn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44373 ## Summary - Corrected the docstring for `position_ids` parameter in `prepare_fa_kwargs_from_position_ids` and `_prepare_from_posids` which incorrectly described attention mask semantics (\"Boolean or int tensor... 1 means vali\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44491",
-    "created_at": "2026-03-06T11:15:42Z",
-    "deletions": 647,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44547",
+    "created_at": "2026-03-09T14:59:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44491/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44491",
+    "files_url": "https://github.com/huggingface/transformers/pull/44547/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44547",
     "labels": [],
-    "merged": true,
-    "number": 44491,
+    "merged": false,
+    "number": 44547,
     "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix training ci and clean some tests",
-    "updated_at": "2026-03-11T16:27:57Z"
+    "state": "open",
+    "title": "Fix position_ids docstring in modeling_flash_attention_utils.py",
+    "updated_at": "2026-03-09T16:08:29Z"
   },
   {
     "additions": 4,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review, thx! This PR fixes failed test case: `pytest -rA tests/models/eurobert/test_modeling_eurobert.py::EuroBertModelTest::test_model_parallelism`",
-    "changed_files": 2,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "The AMD Docker image build (`latest-pytorch-amd`) has been failing since early February due to the 6h GitHub Actions job time limit being exceeded. The root cause is that Flash Attention is cloned and compiled from the latest commit on the\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44490",
-    "created_at": "2026-03-06T10:56:48Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44546",
+    "created_at": "2026-03-09T14:15:51Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44490/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44490",
+    "files_url": "https://github.com/huggingface/transformers/pull/44546/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44546",
     "labels": [],
     "merged": true,
-    "number": 44490,
+    "number": 44546,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix model parallelism bug for eurobert model",
-    "updated_at": "2026-04-01T08:25:28Z"
+    "title": "Fix AMD Docker image build timeout by pinning Flash Attention commit",
+    "updated_at": "2026-03-09T14:37:50Z"
   },
   {
-    "additions": 310,
-    "author": "tarekziade",
+    "additions": 18,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR makes `.ai` the single source of truth for agent templates and skills, and adds explicit `Makefile` targets to generate `Codex` and `Claude Code` specific artifacts. It contains a first skill aimed at properly dealing with typing e\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44336",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44489",
-    "created_at": "2026-03-06T08:42:12Z",
-    "deletions": 62,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44544",
+    "created_at": "2026-03-09T11:38:09Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44489/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44489",
+    "files_url": "https://github.com/huggingface/transformers/pull/44544/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44544",
     "labels": [],
     "merged": true,
-    "number": 44489,
-    "review_comments_count": 2,
+    "number": 44544,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Centralize AI agent templates in `.ai`",
-    "updated_at": "2026-03-18T14:17:22Z"
+    "title": "Fix ansi codes in loading reports when not connected to terminal",
+    "updated_at": "2026-03-09T11:52:16Z"
   },
   {
-    "additions": 482,
-    "author": "abhijeet-dhumal",
+    "additions": 154,
+    "author": "umbilnm",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44486 Adds `KubeflowCallback` to enable automatic progress and metrics reporting for training jobs running on [Kubeflow Trainer](https://github.com/kubeflow/trainer). When training runs inside a Kubeflow Trai\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? Fixes #44521 `apply_chat_template` with `return_assistant_tokens_mask=True` returns all-zero masks when multimodal inputs (images/videos) are present. ## Root cause `generation_indices` (character-level positions of\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44487",
-    "created_at": "2026-03-06T08:31:30Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44543",
+    "created_at": "2026-03-09T10:47:05Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44487/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44487",
+    "files_url": "https://github.com/huggingface/transformers/pull/44543/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44543",
     "labels": [],
-    "merged": true,
-    "number": 44487,
-    "review_comments_count": 8,
-    "state": "closed",
-    "title": "feat(integration): Add KubeflowCallback to enable automatic progress \u2026",
-    "updated_at": "2026-03-18T14:58:23Z"
+    "merged": false,
+    "number": 44543,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix assistant_masks for multimodal inputs in apply_chat_template",
+    "updated_at": "2026-03-09T10:47:05Z"
   },
   {
-    "additions": 691,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44482",
-    "created_at": "2026-03-06T02:39:41Z",
-    "deletions": 332,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44542",
+    "created_at": "2026-03-09T10:00:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44482/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44482",
+    "files_url": "https://github.com/huggingface/transformers/pull/44542/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44542",
     "labels": [],
     "merged": true,
-    "number": 44482,
+    "number": 44542,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add XPU Expectations for higgs_audio_v2 tests",
-    "updated_at": "2026-04-09T02:32:48Z"
+    "title": "Fix backend dependency",
+    "updated_at": "2026-03-09T10:10:24Z"
   },
   {
-    "additions": 2353,
-    "author": "XingyuHu109",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR adds native Transformers support for DeepSeek-V3.2. It introduces a new `deepseek_v32` model family so the official checkpoints resolve through the standard auto classes without `trust_remote_code`. The implementation ke\u2026",
-    "changed_files": 19,
+    "additions": 1,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title, an error occurs when `tp_plan` is empty due to [here](https://github.com/huggingface/transformers/blob/701628527ae1ef37473f05f5d94fac7f457a3f8f/src/transformers/core_model_loading.py#L1120): ``` [rank0]: Traceback (most rece\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44481",
-    "created_at": "2026-03-05T21:14:38Z",
-    "deletions": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44540",
+    "created_at": "2026-03-09T09:37:36Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44481/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44481",
+    "files_url": "https://github.com/huggingface/transformers/pull/44540/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44540",
     "labels": [],
-    "merged": false,
-    "number": 44481,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Add native DeepSeek-V3.2 support",
-    "updated_at": "2026-03-12T16:02:46Z"
+    "merged": true,
+    "number": 44540,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix UnboundLocalError for tp_plan_alt when tp_plan is empty",
+    "updated_at": "2026-03-11T13:42:39Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? add `diffusers` to docker file for `VibeVoice` (added in PR #40546).",
-    "changed_files": 1,
+    "additions": 1333,
+    "author": "kmswin1",
+    "author_association": "NONE",
+    "body_excerpt": "A.X K1 \ubaa8\ub378 \ucd94\uac00\ud569\ub2c8\ub2e4. \uae30\uc874 inference \uc640 \ub3d9\uc77c\ud55c \uac83 \ud655\uc778\ud588\uc2b5\ub2c8\ub2e4.",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44480",
-    "created_at": "2026-03-05T20:54:07Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44539",
+    "created_at": "2026-03-09T09:03:10Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44480/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44480",
+    "files_url": "https://github.com/huggingface/transformers/pull/44539/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44539",
     "labels": [],
-    "merged": true,
-    "number": 44480,
+    "merged": false,
+    "number": 44539,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `diffusers` to CI docker file",
-    "updated_at": "2026-03-05T21:11:17Z"
+    "title": "Add A.X K1",
+    "updated_at": "2026-03-09T09:04:11Z"
   },
   {
-    "additions": 116,
-    "author": "BenjaminBossan",
+    "additions": 14,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "Required fixes: - some code was using unordered data structures, making weight order random - adjust alpha to offset increased rank from fusion - import functions from PEFT if available See https://github.com/huggingface/peft/pull/3083.",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Follow-up of #44532: we need to change the required status check to the new added job `doc_build_status_check` added in this PR, otherwise the merge queue won't get the required (passing) status and will eventually\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44478",
-    "created_at": "2026-03-05T17:19:31Z",
-    "deletions": 26,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44538",
+    "created_at": "2026-03-09T08:59:12Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44478/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44478",
+    "files_url": "https://github.com/huggingface/transformers/pull/44538/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44538",
     "labels": [],
     "merged": true,
-    "number": 44478,
-    "review_comments_count": 1,
+    "number": 44538,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[WIP] FIX Make Mixtral LoRA loading work",
-    "updated_at": "2026-03-11T17:44:20Z"
+    "title": "Add a new job in `build_pr_documentation.yml` (will be the new required job)",
+    "updated_at": "2026-03-09T09:12:25Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. It's quite a random rule to fix https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b to be honest",
+    "additions": 2,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title. Supplement skip logic for XPU in the CPU-only tp tests. Hi @IlyasMoutawwakil, please help review, thanks!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44477",
-    "created_at": "2026-03-05T16:58:29Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44536",
+    "created_at": "2026-03-09T08:09:41Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44477/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44477",
+    "files_url": "https://github.com/huggingface/transformers/pull/44536/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44536",
     "labels": [],
-    "merged": false,
-    "number": 44477,
+    "merged": true,
+    "number": 44536,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[vllm compat] Fix remote code inits",
-    "updated_at": "2026-03-11T10:34:06Z"
+    "title": "Supplement skip logic for XPU in the CPU-only tp tests",
+    "updated_at": "2026-03-09T10:10:49Z"
   },
   {
-    "additions": 4,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "I made an oversight in the fix at #43981 - I didn't realize the dim order changed for torch, so the test was still flaky for torch tensors. The fix reduced the flaky frequency a lot so I thought it had been fixed, but actually it's still t\u2026",
-    "changed_files": 1,
+    "additions": 53,
+    "author": "Anakintano",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Problem `Qwen2_5_VLProcessor.apply_chat_template` raises `ValueError: setting an array element with a sequence` when called with a batch of \u22652 conversations that include images under the default `padding=False` setting. **Root cause:**\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44535",
+    "created_at": "2026-03-09T07:54:58Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44535/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44535",
+    "labels": [],
+    "merged": false,
+    "number": 44535,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix crash in Qwen2_5_VLProcessor when using batched input with padding=False",
+    "updated_at": "2026-03-09T12:44:00Z"
+  },
+  {
+    "additions": 0,
+    "author": "stargazerwh",
+    "author_association": "NONE",
+    "body_excerpt": "## Description The 'transformers run' command was removed in Transformers v5, but the documentation still contained references in 62 model documentation files. This PR removes all `<hfoption id='transformers CLI'>` and `<hfoption id='trans\u2026",
+    "changed_files": 62,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44476",
-    "created_at": "2026-03-05T16:39:44Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44533",
+    "created_at": "2026-03-09T01:39:32Z",
+    "deletions": 439,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44476/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44476",
+    "files_url": "https://github.com/huggingface/transformers/pull/44533/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44533",
     "labels": [],
-    "merged": true,
-    "number": 44476,
+    "merged": false,
+    "number": 44533,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Llava tests for torch too!",
-    "updated_at": "2026-03-11T16:47:05Z"
+    "title": "docs: Remove references to removed 'transformers run' CLI command",
+    "updated_at": "2026-03-09T02:33:08Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
+    "additions": 9,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
+    "body_excerpt": "# What does this PR do? So we can use `Require Merge Queue` functionoality",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44475",
-    "created_at": "2026-03-05T16:29:18Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44532",
+    "created_at": "2026-03-08T20:34:05Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44475/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44475",
+    "files_url": "https://github.com/huggingface/transformers/pull/44532/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44532",
     "labels": [],
     "merged": true,
-    "number": 44475,
+    "number": 44532,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
-    "updated_at": "2026-03-09T22:33:20Z"
+    "title": "Update `build_pr_documentation` workflow for `merge_group` event",
+    "updated_at": "2026-03-08T20:42:57Z"
   },
   {
-    "additions": 875,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/58. In the latest code, Qwen3VL and Qwen3.5 use the same `get_rope_index` func of Qwen2VL. But they should be different since Qwen3VL/Qwen3.5 introduce text timestamps. T\u2026",
-    "changed_files": 9,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44474",
-    "created_at": "2026-03-05T15:46:09Z",
-    "deletions": 107,
+    "additions": 16,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `Qwen2_5_VLProcessor.apply_chat_template` crashing with a `ValueError` when called with a batch of conversations with different prompt lengths and `padding=False` (the default). ### Root cause In the `mm_toke\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44531",
+    "created_at": "2026-03-08T19:38:00Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44474/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44474",
+    "files_url": "https://github.com/huggingface/transformers/pull/44531/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44531",
     "labels": [],
-    "merged": true,
-    "number": 44474,
-    "review_comments_count": 10,
+    "merged": false,
+    "number": 44531,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Bugfix] fix video inference of qwen3vl and qwen3.5 series",
-    "updated_at": "2026-03-10T09:52:44Z"
+    "title": "Fix Qwen2_5_VLProcessor.apply_chat_template crash on unpadded batched input",
+    "updated_at": "2026-03-09T13:14:02Z"
   },
   {
-    "additions": 137,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? supersedes #44446 on `main`, when loading to cpu and using meta devices for non-rank0 processes, it now re-initializes weights on those processes as well as uses more CPU memory. In testing with loading llama3-8b. m\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44473",
-    "created_at": "2026-03-05T14:52:15Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44529",
+    "created_at": "2026-03-08T18:12:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44473/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44473",
+    "files_url": "https://github.com/huggingface/transformers/pull/44529/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44529",
     "labels": [],
     "merged": true,
-    "number": 44473,
-    "review_comments_count": 4,
+    "number": 44529,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix FSDP loading with meta devices",
-    "updated_at": "2026-03-09T15:46:22Z"
+    "title": "test merge queue 1",
+    "updated_at": "2026-03-09T21:01:09Z"
   },
   {
-    "additions": 13,
-    "author": "jblox26",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this fix? Running video inference with any `Qwen3VL` model raises `StopIteration` during `model.generate()`: ``` File \".../transformers/models/qwen3_vl/modeling_qwen3_vl.py\", line 1126, in get_rope_index grid_thw = next(grid_i\u2026",
+    "additions": 3,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44472",
-    "created_at": "2026-03-05T14:50:06Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44528",
+    "created_at": "2026-03-08T17:54:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44472/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44472",
+    "files_url": "https://github.com/huggingface/transformers/pull/44528/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44528",
     "labels": [],
     "merged": false,
-    "number": 44472,
+    "number": 44528,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen3VL get_rope_index StopIteration with per-frame video tokens",
-    "updated_at": "2026-03-06T15:15:58Z"
+    "title": "test merge queue 1",
+    "updated_at": "2026-03-09T20:39:15Z"
   },
   {
-    "additions": 50,
-    "author": "weiguangli-io",
+    "additions": 3,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44466 After `.to(device)`, PyTorch's `Module._apply` may create new `Parameter` objects that no longer share storage with tied weights. This caused `remove_tied_weights_from_state_dict` to fail to detect and\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes these failing [MusicgenStereoIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500451700#step:14:7870) ## Before submitting - [ ] This PR fixes a typo or improves the\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44471",
-    "created_at": "2026-03-05T14:30:17Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44527",
+    "created_at": "2026-03-08T14:26:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44471/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44471",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44471,
+    "files_url": "https://github.com/huggingface/transformers/pull/44527/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44527",
+    "labels": [],
+    "merged": true,
+    "number": 44527,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tied weights serialization being device-dependent",
-    "updated_at": "2026-03-06T14:03:18Z"
+    "title": "Fix failing `MusicgenStereo` integration tests",
+    "updated_at": "2026-03-10T12:28:39Z"
   },
   {
-    "additions": 8,
-    "author": "weiguangli-io",
+    "additions": 90,
+    "author": "JoursBleu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44360 The reference `fp8_index` kernel clamps per-head q\u00b7k scores with `T.max(logits, 0)` before the weighted sum across heads ([kernel.py#L241](https://huggingface.co/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L241\u2026",
+    "body_excerpt": "# What does this PR do? Add GGUF loading support for MiniMax-M2.1 (456B MoE) model. MiniMax-M2.1 is a large Mixture-of-Experts model with 456B total parameters (45.9B active), 256 experts and 8 experts per token. This PR enables loading it\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-44360-6",
-    "cluster_ids": [
-      "cluster-44360-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44470",
-    "created_at": "2026-03-05T14:02:05Z",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44526",
+    "created_at": "2026-03-08T09:57:38Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44470/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44470",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44470,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44526/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44526",
+    "labels": [],
+    "merged": true,
+    "number": 44526,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer",
-    "updated_at": "2026-03-05T15:39:38Z"
+    "title": "Add GGUF support for MiniMax-M2.1 model",
+    "updated_at": "2026-03-18T14:39:20Z"
   },
   {
-    "additions": 4,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? For remote code that behave correctly with tied weights, we need to keep the same behavior as for the main lib, i.e. not remove them from tied weights (as tied weights are marked as missing to avoid inits!!)",
+    "additions": 1,
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes a `KeyError` crash in `_parse_type_hint` in `chat_template_utils.py` (line 117). When processing Union types, the code accesses `subtype[\"type\"]` without checking the key exists. `_get_json_schema_type(Any)` returns `{}` (\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44469",
-    "created_at": "2026-03-05T13:51:55Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44525",
+    "created_at": "2026-03-08T09:21:27Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44469/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44469",
+    "files_url": "https://github.com/huggingface/transformers/pull/44525/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44525",
     "labels": [],
     "merged": true,
-    "number": 44469,
+    "number": 44525,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[remote code/vllm] Fix incorrect tied weights",
-    "updated_at": "2026-03-05T15:07:56Z"
+    "title": "Fix KeyError in _parse_type_hint when Union contains Any",
+    "updated_at": "2026-03-09T13:43:23Z"
   },
   {
-    "additions": 13,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have added_tokens_decoder with specific token_ids, we need to overwrite them in spm model ! example: [UNUSED_TOKEN_146] -> <|im_start|> see internlm2: https://huggingfac\u2026",
+    "additions": 1,
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes a bug in `AssistantTracker.is_active()` in `chat_template_utils.py`. After activation via `activate_tracker()`, `_rendered_blocks` and `_generation_indices` are set to list arguments which may be empty `[]`. The `is_active\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44468",
-    "created_at": "2026-03-05T13:48:56Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44524",
+    "created_at": "2026-03-08T09:21:25Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44468/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44468",
+    "files_url": "https://github.com/huggingface/transformers/pull/44524/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44524",
     "labels": [],
     "merged": true,
-    "number": 44468,
+    "number": 44524,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Replace placeholder tokens as specified in added_tokens_decoder",
-    "updated_at": "2026-03-05T16:29:13Z"
+    "title": "Fix AssistantTracker.is_active() returning False after activation with empty lists",
+    "updated_at": "2026-03-09T13:36:19Z"
   },
   {
-    "additions": 346,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have `added_tokens_decoder` with specific token_ids, we need to overwrite them in spm model ! `example: [UNUSED_TOKEN_146] -> <|im_start|>` see internlm2: https://huggin\u2026",
-    "changed_files": 24,
+    "additions": 2,
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes two small bugs in `load_sharded_checkpoint` in `trainer_utils.py`: **Bug 1 \u2014 Copy-paste error in error message (line 1108):** When reporting unexpected keys, the error message incorrectly says \"Missing key(s)\" instead of \"\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44467",
-    "created_at": "2026-03-05T13:44:54Z",
-    "deletions": 204,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44523",
+    "created_at": "2026-03-08T09:21:22Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44467/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44467",
+    "files_url": "https://github.com/huggingface/transformers/pull/44523/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44523",
     "labels": [],
-    "merged": false,
-    "number": 44467,
+    "merged": true,
+    "number": 44523,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Placeholder tokens update",
-    "updated_at": "2026-03-05T13:47:28Z"
+    "state": "closed",
+    "title": "Fix error message label and docstring default in load_sharded_checkpoint",
+    "updated_at": "2026-03-10T15:48:41Z"
   },
   {
-    "additions": 20,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the loss calculation; we should calculate it on scaled targets. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with\u2026",
-    "changed_files": 2,
+    "additions": 41,
+    "author": "nakigami",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR introduces initial unit test coverage for the `transformers-cli` tool, specifically focusing on diagnostic and model utility commands. Currently, these CLI entry points lack automated tests. These new tests\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44465",
-    "created_at": "2026-03-05T12:59:23Z",
-    "deletions": 14,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44520",
+    "created_at": "2026-03-08T01:30:39Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44465/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44465",
+    "files_url": "https://github.com/huggingface/transformers/pull/44520/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44520",
     "labels": [
-      "bug"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 44465,
+    "merged": false,
+    "number": 44520,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[timesfm2_5] fix loss scaling",
-    "updated_at": "2026-03-05T14:50:26Z"
+    "title": "test(cli): add unit tests for env and model utility commands",
+    "updated_at": "2026-03-09T13:19:15Z"
   },
   {
-    "additions": 16,
-    "author": "weiguangli-io",
+    "additions": 3,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44462 When a model's `model_type` (e.g. `\"llama\"`) has no entry in `TOKENIZER_MAPPING_NAMES`, `AutoTokenizer.from_pretrained` falls through to loading the tokenizer class declared in `tokenizer_config.json`\u2026",
+    "body_excerpt": "# What does this PR do? Fixes these failing [MarianIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500458014#step:14:6186) <img width=\"2378\" height=\"657\" alt=\"image\" src=\"https://github.com/user\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44463",
-    "created_at": "2026-03-05T12:45:57Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44519",
+    "created_at": "2026-03-07T19:53:23Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44463/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44463",
+    "files_url": "https://github.com/huggingface/transformers/pull/44519/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44519",
     "labels": [],
-    "merged": false,
-    "number": 44463,
+    "merged": true,
+    "number": 44519,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoTokenizer ignoring tokenizer.json for unregistered model types",
-    "updated_at": "2026-03-07T13:50:44Z"
+    "title": "Fix failing `MarianIntegrationTests`",
+    "updated_at": "2026-03-09T14:11:12Z"
   },
   {
     "additions": 12,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? for SP loss we do not have torch device mesh but rather a deepspeed only. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44461",
-    "created_at": "2026-03-05T11:39:02Z",
-    "deletions": 1,
+    "author": "KartikPawade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44514 `Qwen2_5_VLProcessor.__call__` crashed with a `ValueError` when processing a batch of conversations with different lengths and `padding=False` (the default). **Root cause:** The `mm_token_type_ids` blo\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44518",
+    "created_at": "2026-03-07T19:22:40Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44461/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44461",
+    "files_url": "https://github.com/huggingface/transformers/pull/44518/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44518",
     "labels": [],
-    "merged": true,
-    "number": 44461,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 44518,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Trainer] fix SP loss",
-    "updated_at": "2026-03-05T13:00:40Z"
+    "title": "fix: Qwen2_5_VLProcessor crashes on batched input when padding=False \u2026",
+    "updated_at": "2026-03-10T18:57:10Z"
   },
   {
-    "additions": 1,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "additions": 12637,
+    "author": "ShahVandit",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds Qwen3-TTS, a series of text-to-speech models by the Qwen team (Alibaba Group), to Transformers. **Architecture:** - `Qwen3TTSForConditionalGeneration` \u2014 text to multi-codebook speech codes (talker) - `Qwen3TTS\u2026",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44460",
-    "created_at": "2026-03-05T10:53:07Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44517",
+    "created_at": "2026-03-07T18:48:04Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44460/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44460",
+    "files_url": "https://github.com/huggingface/transformers/pull/44517/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44517",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 44517,
+    "review_comments_count": 19,
+    "state": "open",
+    "title": "Add qwen3 tts",
+    "updated_at": "2026-04-13T07:53:06Z"
+  },
+  {
+    "additions": 65,
+    "author": "JasonCZMeng",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fix `Qwen2_5_VLProcessor.apply_chat_template` crashing with `ValueError` when called with batched inputs of different sequence lengths (ragged lists) and `padding=False` (the default). Fixes #44514 ## Root Cause The `mm_token_ty\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-44514-8",
+    "cluster_ids": [
+      "cluster-44514-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44516",
+    "created_at": "2026-03-07T18:33:40Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44516/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44516",
     "labels": [],
-    "merged": true,
-    "number": 44460,
+    "merged": false,
+    "number": 44516,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "trigger tensor parallel utils test in the CI",
-    "updated_at": "2026-03-05T11:25:51Z"
+    "title": "fix(qwen2_5_vl): handle ragged batched input in apply_chat_template",
+    "updated_at": "2026-03-09T13:14:22Z"
   },
   {
-    "additions": 95,
-    "author": "weiguangli-io",
+    "additions": 1,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44458 PR #42848 introduced a regression where `torch.compile` on `MllamaForConditionalGeneration` fails with a C++ compile error from the torch inductor backend (`'tmp2' was not declared in this scope`). The root cause is\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes this failing [GPTNeoModelLanguageGenerationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500455894#step:14:1483) <img width=\"2363\" height=\"239\" alt=\"image\" src=\"https://githu\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44459",
-    "created_at": "2026-03-05T07:58:28Z",
-    "deletions": 7,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44515",
+    "created_at": "2026-03-07T18:16:35Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44459/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44459",
+    "files_url": "https://github.com/huggingface/transformers/pull/44515/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44515",
     "labels": [],
-    "merged": false,
-    "number": 44459,
-    "review_comments_count": 2,
+    "merged": true,
+    "number": 44515,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: make Mllama cross attention mask compatible with torch.compile",
-    "updated_at": "2026-03-07T13:50:40Z"
+    "title": "Fix failing `GPTNeoModelLanguageGenerationTest`",
+    "updated_at": "2026-03-09T14:11:21Z"
   },
   {
     "additions": 1,
-    "author": "Sai-Suraj-27",
+    "author": "math-hiyoko",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [DepthProModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453624#step:14:4893). <img width=\"2231\" height=\"99\" alt=\"image\" src=\"https://github.com\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 69,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44456",
-    "created_at": "2026-03-05T06:01:06Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44513",
+    "created_at": "2026-03-07T16:11:55Z",
+    "deletions": 492,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44456/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44456",
+    "files_url": "https://github.com/huggingface/transformers/pull/44513/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44513",
     "labels": [],
     "merged": true,
-    "number": 44456,
+    "number": 44513,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `DepthProModelIntegrationTest`",
-    "updated_at": "2026-03-05T14:52:40Z"
+    "title": "Fix: Remove references to transformers run command",
+    "updated_at": "2026-03-09T15:37:16Z"
   },
   {
-    "additions": 3,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Check if accelerator exists before using `pin_memory`. reproduce it on a CPU only node: `python examples/pytorch/continuous_batching_simple.py` output: ``` File \"/home/jiqingfe/transformers/src/transformers/generation/continuous_batching/i\u2026",
+    "additions": 4,
+    "author": "04cb",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44355. The inspect.getsource() call raises TypeError when running compiled Python files with Cython-compiled functions. Added try-except block to gracefully handle this case by returning a default indentation level of 4.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44455",
-    "created_at": "2026-03-05T05:20:13Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44511",
+    "created_at": "2026-03-07T05:36:25Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44455/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44455",
-    "labels": [],
-    "merged": true,
-    "number": 44455,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44511/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44511",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44511,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix pin_memory for contiguous batching",
-    "updated_at": "2026-03-09T13:49:30Z"
+    "title": "Fix get_docstring_indentation_level to handle compiled functions",
+    "updated_at": "2026-03-09T13:10:06Z"
   },
   {
-    "additions": 17,
-    "author": "weiguangli-io",
+    "additions": 3,
+    "author": "math-hiyoko",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Fixes the `_init_weights` method in `PegasusPreTrainedModel` and `MarianPreTrainedModel` to handle sinusoidal position embeddings before calling `super()._init_weights(module)`, preventing the generic `nn.Embedding` branch fro\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 27,
+    "cluster_id": "cluster-43827-6",
+    "cluster_ids": [
+      "cluster-43827-6"
+    ],
+    "cluster_role": "canonical",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44454",
-    "created_at": "2026-03-05T03:51:38Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44510",
+    "created_at": "2026-03-06T23:37:51Z",
+    "deletions": 358,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44454/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44454",
+    "files_url": "https://github.com/huggingface/transformers/pull/44510/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44510",
     "labels": [],
-    "merged": false,
-    "number": 44454,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44510,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix Pegasus sinusoidal position embedding init regression in v5",
-    "updated_at": "2026-03-09T02:17:41Z"
+    "title": "Fix: Remove references to `text2text-generation`, `summarization` and `translation` pipeline tasks",
+    "updated_at": "2026-03-10T00:39:30Z"
   },
   {
-    "additions": 1,
-    "author": "weiguangli-io",
+    "additions": 8,
+    "author": "KartikPawade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Fix KeyError in `convert_to_native_format` for dict vocab Fixes #44451 ### Problem `AutoTokenizer.from_pretrained(\"vesteinn/ScandiBERT\")` raises `KeyError: 0` in `convert_to_native_format`. ScandiBERT's `tokenizer_config.json` specifies\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Older OwlViT checkpoints stored `position_ids` as buffers in the text and vision embedding modules. These tensors are simple integer ranges (0 \u2192 max sequence length) and are now recomputed dynamically during initial\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44452",
-    "created_at": "2026-03-05T03:34:02Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44508",
+    "created_at": "2026-03-06T18:49:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44452/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44452",
+    "files_url": "https://github.com/huggingface/transformers/pull/44508/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44508",
     "labels": [],
     "merged": true,
-    "number": 44452,
+    "number": 44508,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix KeyError in convert_to_native_format for dict vocab",
-    "updated_at": "2026-03-19T13:59:23Z"
+    "title": "Fix unexpected `position_ids` keys when loading OwlViT models",
+    "updated_at": "2026-03-18T18:30:48Z"
   },
   {
-    "additions": 297,
-    "author": "sandesh-bhandari-dev",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 32,
+    "additions": 4,
+    "author": "0xDELUXA",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `torch.distributed.fsdp` is not available in all PyTorch builds (for example, Windows ROCm). Importing it unconditionally at the top level causes an immediate crash with: ``` ModuleNotFoundError: No module named 'to\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44449",
-    "created_at": "2026-03-05T01:40:47Z",
-    "deletions": 319,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44507",
+    "created_at": "2026-03-06T18:03:49Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44449/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44449",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44449,
+    "files_url": "https://github.com/huggingface/transformers/pull/44507/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44507",
+    "labels": [],
+    "merged": true,
+    "number": 44507,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: 3 bugs : MoE aux loss, ANSI TTY leak, pipeline removed and also task error",
-    "updated_at": "2026-03-05T13:22:40Z"
+    "title": "Fix: Conditionally import `torch.distributed.fsdp` in `trainer_seq2seq.py`",
+    "updated_at": "2026-03-13T10:17:56Z"
   },
   {
-    "additions": 8,
-    "author": "yonigozlan",
+    "additions": 1,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes MiniCPM-o-2_6 related tests failures in vLLM, and improve backward compatibility with remote code in general. Cc @hmellor @zucchini-nlp",
+    "body_excerpt": "# What does this PR do? The current implementation does not work with the `mps` device and TP. ## Example script script.py ``` import os os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\" import torch from transformers import AutoModelForCaus\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44447",
-    "created_at": "2026-03-04T21:55:16Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44506",
+    "created_at": "2026-03-06T18:03:33Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44447/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44447",
+    "files_url": "https://github.com/huggingface/transformers/pull/44506/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44506",
     "labels": [],
     "merged": true,
-    "number": 44447,
+    "number": 44506,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[vLLM] Fix backward compatibility with hardcoded subprocessors classes in processors",
-    "updated_at": "2026-03-05T16:07:28Z"
+    "title": "Tensor Parallelism and `mps` device",
+    "updated_at": "2026-03-11T15:16:49Z"
   },
   {
-    "additions": 4,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes fsdp loading for rank!=0 as they should stay on meta device. This was reverted in a PR that I can find anymore.",
+    "additions": 16,
+    "author": "kushalkkb",
+    "author_association": "NONE",
+    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44446",
-    "created_at": "2026-03-04T21:38:00Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44505",
+    "created_at": "2026-03-06T17:47:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44446/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44446",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44505/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44505",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44446,
+    "number": 44505,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix fdsp loading meta device",
-    "updated_at": "2026-03-05T15:07:57Z"
+    "title": "Improve error handling in load_vocab for invalid vocabulary path",
+    "updated_at": "2026-03-10T04:14:31Z"
   },
   {
-    "additions": 2282,
-    "author": "gabe-l-hart",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for the forthcoming Granite Docling model based on the Granite 4 LLM architecture (`GraniteMoeHybrid`). ## Draft Status This PR is in draft pending the possibility of some additional changes: -\u2026",
-    "changed_files": 8,
+    "additions": 13,
+    "author": "kushalkkb",
+    "author_association": "NONE",
+    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44445",
-    "created_at": "2026-03-04T20:54:17Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44445/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44445",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44504",
+    "created_at": "2026-03-06T17:24:10Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44504/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44504",
     "labels": [],
     "merged": false,
-    "number": 44445,
-    "review_comments_count": 44,
-    "state": "open",
-    "title": "Adding support for GraniteDoclingHybrid",
-    "updated_at": "2026-03-11T19:19:44Z"
+    "number": 44504,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve error handling in load_vocab for invalid vocabulary path",
+    "updated_at": "2026-03-06T17:46:17Z"
   },
   {
-    "additions": 7,
+    "additions": 8,
     "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing use case was identified and fixed in this PR: \u2192 [TOKENIZER_MAPPING_NAMES](https://github.com/huggingface/transformers/blob/main/src/transformers/models/auto/tokenization_auto.py#L63-L338) doe\u2026",
-    "changed_files": 1,
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Moonshine:** In [MoonshineEncoder.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/moonshine/modular_moon\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44443",
-    "created_at": "2026-03-04T20:01:55Z",
-    "deletions": 7,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44503",
+    "created_at": "2026-03-06T17:08:00Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44443/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44443",
+    "files_url": "https://github.com/huggingface/transformers/pull/44503/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44503",
     "labels": [],
     "merged": true,
-    "number": 44443,
+    "number": 44503,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(tokenizer): Only strip Fast from class names in AutoTokenizer if used as a suffix",
-    "updated_at": "2026-03-09T15:03:49Z"
+    "title": "fix(testing): Fix MoonshineEncoder UnboundLocalError and Florence2VisionBackbone dtype mismatch",
+    "updated_at": "2026-03-09T18:06:17Z"
   },
   {
-    "additions": 37,
-    "author": "NielsRogge",
+    "additions": 1,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes the training of LW-DETR. It turned out that the model was not able to overfit a single batch. Hence I asked Codex to investigate this. It turns out there were 3 bugs: 1. A logits calibration gap, cause\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per the title. Introduced in https://github.com/huggingface/transformers/pull/44381, not sure why the CI passed",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44441",
-    "created_at": "2026-03-04T19:54:20Z",
-    "deletions": 48,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44502",
+    "created_at": "2026-03-06T17:03:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44441/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44441",
+    "files_url": "https://github.com/huggingface/transformers/pull/44502/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44502",
     "labels": [],
     "merged": true,
-    "number": 44441,
-    "review_comments_count": 4,
+    "number": 44502,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[LW-DETR] Fix training",
-    "updated_at": "2026-03-09T15:36:02Z"
+    "title": "Fix type checker",
+    "updated_at": "2026-03-06T17:09:37Z"
   },
   {
-    "additions": 11,
-    "author": "weiguangli-io",
+    "additions": 1,
+    "author": "frogNotToad",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes the noisy `HfHubHTTPError` exception output that appears when loading a transformer model from a repository that has discussions disabled. ### Root cause The `previous_pr()` function in `safetensors_conversio\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Changes the word \"maximize\" to \"minimize\" in the docs Fixes #44492 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). ## Who can review? Anyon\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44440",
-    "created_at": "2026-03-04T18:31:13Z",
-    "deletions": 7,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44501",
+    "created_at": "2026-03-06T16:58:14Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44440/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44440",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44440,
+    "files_url": "https://github.com/huggingface/transformers/pull/44501/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44501",
+    "labels": [],
+    "merged": true,
+    "number": 44501,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: catch HfHubHTTPError in safetensors auto_conversion thread",
-    "updated_at": "2026-03-05T15:39:11Z"
+    "title": "Fixed typo in docs/source/en/kv_cache.md",
+    "updated_at": "2026-03-06T20:05:36Z"
   },
   {
-    "additions": 5,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [ProphetNetModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453490#step:14:2331). <img width=\"2303\" height=\"165\" alt=\"image\" src=\"https://github.\u2026",
-    "changed_files": 1,
+    "additions": 18,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - Do proper type check in case jax is installed. - Make sure older torch versions don't raise typing issues",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44439",
-    "created_at": "2026-03-04T16:55:50Z",
-    "deletions": 5,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44500",
+    "created_at": "2026-03-06T16:56:12Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44439/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44439",
+    "files_url": "https://github.com/huggingface/transformers/pull/44500/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44500",
     "labels": [],
     "merged": true,
-    "number": 44439,
+    "number": 44500,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `ProphetNetModelIntegrationTest`",
-    "updated_at": "2026-03-05T15:43:59Z"
+    "title": "Follow-up typing checking fixes",
+    "updated_at": "2026-03-09T10:47:31Z"
   },
   {
-    "additions": 135,
-    "author": "SunMarc",
+    "additions": 11,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds flashoptim from databricks team into Trainer ! cc @tomaarsen ### Results ``` Optimizer Loss Time Speed Memory \u0394 Mem \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 AdamW Fused 1.4\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? This PR is just a small cleanup. The `TensorParallelLayer` class defines `_prepare_input_fn` and `_prepare_output_fn` as static methods. But then these methods end-up being instance or static methods in the sub clas\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44438",
-    "created_at": "2026-03-04T16:31:35Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44499",
+    "created_at": "2026-03-06T16:46:18Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44438/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44438",
+    "files_url": "https://github.com/huggingface/transformers/pull/44499/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44499",
     "labels": [],
-    "merged": false,
-    "number": 44438,
+    "merged": true,
+    "number": 44499,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add flashoptim",
-    "updated_at": "2026-04-02T13:06:17Z"
+    "state": "closed",
+    "title": "Make `_prepare_input_fn` and `_prepare_output_fn` instance methods",
+    "updated_at": "2026-03-10T13:53:18Z"
   },
   {
-    "additions": 150,
-    "author": "ArthurZucker",
+    "additions": 1,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Add the `neuron` backend for initialization in TP.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44437",
-    "created_at": "2026-03-04T15:34:34Z",
-    "deletions": 134,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44498",
+    "created_at": "2026-03-06T16:23:18Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44437/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44437",
+    "files_url": "https://github.com/huggingface/transformers/pull/44498/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44498",
     "labels": [],
     "merged": true,
-    "number": 44437,
-    "review_comments_count": 5,
+    "number": 44498,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "protect imports",
-    "updated_at": "2026-03-04T16:19:49Z"
+    "title": "feat: add neuron in tensor parallelism initialization",
+    "updated_at": "2026-03-12T18:07:52Z"
   },
   {
-    "additions": 8,
-    "author": "jw9603",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44423 `continuous_batching_chat_completion` was missing input preprocessing and `tokenize=True` in `apply_chat_template`, causing `'str' object has no attribute 'to'` for multimodal models. Added the same `get_model_modality` + `get\u2026",
-    "changed_files": 1,
+    "additions": 43,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44466 and avoid issues with torch `.bin` checkpoints which always contain both keys!",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44436",
-    "created_at": "2026-03-04T15:26:48Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44497",
+    "created_at": "2026-03-06T16:21:14Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44436/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44436",
+    "files_url": "https://github.com/huggingface/transformers/pull/44497/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44497",
     "labels": [],
     "merged": true,
-    "number": 44436,
-    "review_comments_count": 4,
+    "number": 44497,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix continuous batching for multimodal models",
-    "updated_at": "2026-03-09T13:58:37Z"
+    "title": "[tie weights] \ud83d\udea8 If both weights are present with same weights, still tie them",
+    "updated_at": "2026-03-09T15:00:25Z"
   },
   {
-    "additions": 138,
-    "author": "remi-or",
+    "additions": 69,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds the option to have a ContinuousBatchingManager not be destroyed after generation is over. This allows the user to re-use the manager without requiring him to know any other entry point for CB apart from `generate_batch` or the\u2026",
+    "body_excerpt": "As per title, WIP",
+    "changed_files": 354,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44495",
+    "created_at": "2026-03-06T13:57:04Z",
+    "deletions": 521,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44495/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44495",
+    "labels": [],
+    "merged": false,
+    "number": 44495,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[`Gradient Ckpting`] Remove unnecessary attribute definitions",
+    "updated_at": "2026-03-06T13:58:22Z"
+  },
+  {
+    "additions": 13,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "- updates `ty` to `0.2.0` - pinned regex package (older versions did not have typing stubs) - fixed a couple of typing failures that went through via other parallel branches",
     "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44435",
-    "created_at": "2026-03-04T14:17:08Z",
-    "deletions": 54,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44494",
+    "created_at": "2026-03-06T12:57:25Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44435/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44435",
+    "files_url": "https://github.com/huggingface/transformers/pull/44494/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44494",
     "labels": [],
     "merged": true,
-    "number": 44435,
-    "review_comments_count": 2,
+    "number": 44494,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[CB] Persistent manager",
-    "updated_at": "2026-03-26T22:02:28Z"
+    "title": "Update `ty` to 0.0.20",
+    "updated_at": "2026-03-06T13:30:25Z"
   },
   {
-    "additions": 413,
-    "author": "remi-or",
+    "additions": 439,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds a dedicated config for continuous batching, which is starting to have a lot parameters. This will give the user a clear view of what is possible and make adding new parameters easier. No breaking changes through `account_for_c\u2026",
-    "changed_files": 9,
+    "body_excerpt": "# What does this PR do? Since I removed some folders (fsdp, deepspeed) related to training, I need to modify the workflows !",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44434",
-    "created_at": "2026-03-04T13:49:05Z",
-    "deletions": 303,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44491",
+    "created_at": "2026-03-06T11:15:42Z",
+    "deletions": 647,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44434/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44434",
+    "files_url": "https://github.com/huggingface/transformers/pull/44491/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44491",
     "labels": [],
     "merged": true,
-    "number": 44434,
-    "review_comments_count": 12,
+    "number": 44491,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[CB] Add dedicated config",
-    "updated_at": "2026-03-13T13:56:40Z"
+    "title": "Fix training ci and clean some tests",
+    "updated_at": "2026-03-11T16:27:57Z"
   },
   {
-    "additions": 177,
-    "author": "leopold-tzafon",
+    "additions": 4,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Instead of silently failing when mm_token_type_ids is not passed, derives it in Qwen3 and Qwen3.5. Same as it was before: https://github.com/huggingface/transformers/commit/c281a2de8998e66e93fac30a236225528531df9b P\u2026",
-    "changed_files": 18,
+    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review, thx! This PR fixes failed test case: `pytest -rA tests/models/eurobert/test_modeling_eurobert.py::EuroBertModelTest::test_model_parallelism`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44433",
-    "created_at": "2026-03-04T13:46:14Z",
-    "deletions": 61,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44490",
+    "created_at": "2026-03-06T10:56:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44433/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44433",
+    "files_url": "https://github.com/huggingface/transformers/pull/44490/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44490",
     "labels": [],
     "merged": true,
-    "number": 44433,
+    "number": 44490,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: raise error if mm_token_type_ids not supplied ",
-    "updated_at": "2026-03-12T17:12:47Z"
+    "title": "fix model parallelism bug for eurobert model",
+    "updated_at": "2026-04-01T08:25:28Z"
   },
   {
-    "additions": 85,
-    "author": "zucchini-nlp",
+    "additions": 310,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, instead of having to divide image token by norm scale, we can do it same way as in other model (eg. gemma3) and add a custom embed layer. It should be 100% BC because users usually call `self.embed_tok\u2026",
-    "changed_files": 8,
+    "body_excerpt": "This PR makes `.ai` the single source of truth for agent templates and skills, and adds explicit `Makefile` targets to generate `Codex` and `Claude Code` specific artifacts. It contains a first skill aimed at properly dealing with typing e\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44432",
-    "created_at": "2026-03-04T10:04:40Z",
-    "deletions": 38,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44489",
+    "created_at": "2026-03-06T08:42:12Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44432/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44432",
+    "files_url": "https://github.com/huggingface/transformers/pull/44489/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44489",
     "labels": [],
     "merged": true,
-    "number": 44432,
-    "review_comments_count": 0,
+    "number": 44489,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Make paligemma embed tokens standard",
-    "updated_at": "2026-03-11T08:38:41Z"
+    "title": "Centralize AI agent templates in `.ai`",
+    "updated_at": "2026-03-18T14:17:22Z"
   },
   {
-    "additions": 4094,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-opening back a PR on cleaning up clip-like model's backbones. Let's merge it now, I've been seeing quite a lot of ppl reporting it and I am not sure when it will be resolved by the big vision refactor Basically,\u2026",
-    "changed_files": 42,
+    "additions": 482,
+    "author": "abhijeet-dhumal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44486 Adds `KubeflowCallback` to enable automatic progress and metrics reporting for training jobs running on [Kubeflow Trainer](https://github.com/kubeflow/trainer). When training runs inside a Kubeflow Trai\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 25,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44431",
-    "created_at": "2026-03-04T10:02:13Z",
-    "deletions": 2220,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44487",
+    "created_at": "2026-03-06T08:31:30Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44431/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44431",
+    "files_url": "https://github.com/huggingface/transformers/pull/44487/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44487",
     "labels": [],
     "merged": true,
-    "number": 44431,
-    "review_comments_count": 92,
+    "number": 44487,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "Refactor CLIP-like models",
-    "updated_at": "2026-04-10T13:40:31Z"
+    "title": "feat(integration): Add KubeflowCallback to enable automatic progress \u2026",
+    "updated_at": "2026-03-18T14:58:23Z"
   },
   {
-    "additions": 0,
-    "author": "Rohang2005",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? This PR fixes an inconsistency in the AFMoE module where `past_key_values` was passed to a function argument expecting `past_key_value`. The function signature expects a singular cache object (`past_key_value`), bu\u2026",
-    "changed_files": 0,
+    "additions": 691,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44430",
-    "created_at": "2026-03-04T08:13:38Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44430/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44430",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44430,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix inconsistent past_key_value/past_key_values usage in AFMoE modeling",
-    "updated_at": "2026-03-04T14:07:32Z"
-  },
-  {
-    "additions": 14,
-    "author": "thakoreh",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module was using `PALETTE['italic']` and `PALETTE['bold']` directly in string formatting, which caused ANSI escape codes to be emitted even when stdout is not connected to a terminal (e.g., when\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44336-7",
-    "cluster_ids": [
-      "cluster-44336-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44429",
-    "created_at": "2026-03-04T07:47:02Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44482",
+    "created_at": "2026-03-06T02:39:41Z",
+    "deletions": 332,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44429/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44429",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44429,
+    "files_url": "https://github.com/huggingface/transformers/pull/44482/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44482",
+    "labels": [],
+    "merged": true,
+    "number": 44482,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ANSI codes emitted in loading_report when stdout is not a TTY",
-    "updated_at": "2026-03-04T13:58:46Z"
+    "title": "add XPU Expectations for higgs_audio_v2 tests",
+    "updated_at": "2026-04-09T02:32:48Z"
   },
   {
-    "additions": 10,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
-    "changed_files": 2,
+    "additions": 2353,
+    "author": "XingyuHu109",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR adds native Transformers support for DeepSeek-V3.2. It introduces a new `deepseek_v32` model family so the official checkpoints resolve through the standard auto classes without `trust_remote_code`. The implementation ke\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44428",
-    "created_at": "2026-03-04T07:41:20Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44481",
+    "created_at": "2026-03-05T21:14:38Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44428/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44428",
+    "files_url": "https://github.com/huggingface/transformers/pull/44481/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44481",
     "labels": [],
-    "merged": true,
-    "number": 44428,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Add XPU Expectations for vibe voice acoustic tokenizer tests",
-    "updated_at": "2026-04-02T03:21:38Z"
+    "merged": false,
+    "number": 44481,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Add native DeepSeek-V3.2 support",
+    "updated_at": "2026-03-12T16:02:46Z"
   },
   {
-    "additions": 43,
-    "author": "Jaredw2289-svg",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44297 ## Problem `tokenizer.save_pretrained()` overwrites `tokenizer_class` in `tokenizer_config.json` with the current wrapper class (e.g. `PreTrainedTokenizerFast`) instead of preserving the original class from the loaded config (\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? add `diffusers` to docker file for `VibeVoice` (added in PR #40546).",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44427",
-    "created_at": "2026-03-04T06:03:56Z",
-    "deletions": 6,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44480",
+    "created_at": "2026-03-05T20:54:07Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44427/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44427",
+    "files_url": "https://github.com/huggingface/transformers/pull/44480/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44480",
     "labels": [],
-    "merged": false,
-    "number": 44427,
+    "merged": true,
+    "number": 44480,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(tokenization): preserve original tokenizer_class in save_pretrained",
-    "updated_at": "2026-03-11T02:59:12Z"
+    "title": "Add `diffusers` to CI docker file",
+    "updated_at": "2026-03-05T21:11:17Z"
   },
   {
-    "additions": 29,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil Can you help review? Thx!",
-    "changed_files": 1,
+    "additions": 116,
+    "author": "BenjaminBossan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Required fixes: - some code was using unordered data structures, making weight order random - adjust alpha to offset increased rank from fusion - import functions from PEFT if available See https://github.com/huggingface/peft/pull/3083.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44426",
-    "created_at": "2026-03-04T05:57:34Z",
-    "deletions": 10,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44478",
+    "created_at": "2026-03-05T17:19:31Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44426/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44426",
+    "files_url": "https://github.com/huggingface/transformers/pull/44478/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44478",
     "labels": [],
     "merged": true,
-    "number": 44426,
-    "review_comments_count": 2,
+    "number": 44478,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "update the expected output for qwen2_5_vl w/ pytorch 2.10 XPU",
-    "updated_at": "2026-04-09T02:32:41Z"
+    "title": "[WIP] FIX Make Mixtral LoRA loading work",
+    "updated_at": "2026-03-11T17:44:20Z"
   },
   {
     "additions": 1,
-    "author": "qgallouedec",
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "I believe the second `if` should be `elif` so the else branch only triggers when neither the string-truncation NOR the float-formatting conditions apply. Otherwise it overwrites the truncation message with the original long string.",
+    "body_excerpt": "# What does this PR do? As per the title. It's quite a random rule to fix https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b to be honest",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44425",
-    "created_at": "2026-03-04T02:48:00Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44477",
+    "created_at": "2026-03-05T16:58:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44425/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44425",
+    "files_url": "https://github.com/huggingface/transformers/pull/44477/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44477",
     "labels": [],
     "merged": false,
-    "number": 44425,
+    "number": 44477,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix conditional check for float formatting",
-    "updated_at": "2026-03-04T02:48:41Z"
+    "state": "closed",
+    "title": "[vllm compat] Fix remote code inits",
+    "updated_at": "2026-03-11T10:34:06Z"
   },
   {
-    "additions": 6,
-    "author": "jw9603",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `AttributeError: 'str' object has no attribute 'to'` when using `transformers serve --continuous-batching` with multimodal models like Qwen3.5-9B. `processor.apply_chat_template()` returns a plain string (not\u2026",
+    "additions": 4,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "I made an oversight in the fix at #43981 - I didn't realize the dim order changed for torch, so the test was still flaky for torch tensors. The fix reduced the flaky frequency a lot so I thought it had been fixed, but actually it's still t\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44424",
-    "created_at": "2026-03-04T00:56:08Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44476",
+    "created_at": "2026-03-05T16:39:44Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44424/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44424",
+    "files_url": "https://github.com/huggingface/transformers/pull/44476/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44476",
     "labels": [],
-    "merged": false,
-    "number": 44424,
+    "merged": true,
+    "number": 44476,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `transformers serve --continuous-batching` for multimodal models",
-    "updated_at": "2026-03-05T09:16:25Z"
+    "title": "Fix Llava tests for torch too!",
+    "updated_at": "2026-03-11T16:47:05Z"
   },
   {
-    "additions": 117,
-    "author": "mitre88",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds a Spanish (es) translation of the `conversations.md` guide, which covers the fundamentals of using chat models in Transformers. ### Translated sections: - Chat CLI usage - TextGenerationPipeline in chat mode -\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44422",
-    "created_at": "2026-03-04T00:42:43Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44475",
+    "created_at": "2026-03-05T16:29:18Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44422/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44422",
+    "files_url": "https://github.com/huggingface/transformers/pull/44475/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44475",
     "labels": [],
     "merged": true,
-    "number": 44422,
+    "number": 44475,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: add Spanish translation for conversations.md (chat basics)",
-    "updated_at": "2026-03-04T16:45:24Z"
+    "title": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
+    "updated_at": "2026-03-09T22:33:20Z"
   },
   {
-    "additions": 309,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? When we shard weights according to a TP plan, we do not update the corresponding parent module attributes. For instance if we shard the weight of a `torch.nn.Linear`, we should also update its `in_features` or `out_\u2026",
-    "changed_files": 4,
+    "additions": 875,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/58. In the latest code, Qwen3VL and Qwen3.5 use the same `get_rope_index` func of Qwen2VL. But they should be different since Qwen3VL/Qwen3.5 introduce text timestamps. T\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44421",
-    "created_at": "2026-03-03T22:51:47Z",
-    "deletions": 5,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44474",
+    "created_at": "2026-03-05T15:46:09Z",
+    "deletions": 107,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44421/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44421",
+    "files_url": "https://github.com/huggingface/transformers/pull/44474/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44474",
     "labels": [],
     "merged": true,
-    "number": 44421,
-    "review_comments_count": 0,
+    "number": 44474,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "Update parent module attributes when sharding with TP",
-    "updated_at": "2026-03-05T23:32:06Z"
+    "title": "[Bugfix] fix video inference of qwen3vl and qwen3.5 series",
+    "updated_at": "2026-03-10T09:52:44Z"
   },
   {
-    "additions": 249,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "- removes \"Number of accelerators\" section from \"Accelerator selection\" guide since this is probably pretty commonly known - add a new \"DDP\" guide - refactored \"Accelerate\" guide with a more focused overview of what it is and how to config\u2026",
-    "changed_files": 5,
+    "additions": 137,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? supersedes #44446 on `main`, when loading to cpu and using meta devices for non-rank0 processes, it now re-initializes weights on those processes as well as uses more CPU memory. In testing with loading llama3-8b. m\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44420",
-    "created_at": "2026-03-03T22:41:59Z",
-    "deletions": 250,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44473",
+    "created_at": "2026-03-05T14:52:15Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44420/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44420",
+    "files_url": "https://github.com/huggingface/transformers/pull/44473/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44473",
     "labels": [],
-    "merged": false,
-    "number": 44420,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[docs] distributed training",
-    "updated_at": "2026-03-11T17:36:12Z"
+    "merged": true,
+    "number": 44473,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "fix FSDP loading with meta devices",
+    "updated_at": "2026-03-09T15:46:22Z"
   },
   {
-    "additions": 6,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? To be merged after #44302 and https://github.com/huggingface/kernels/pull/285. It adds the `neuron` device in checks for custom kernels, enabling to load kernels for Neuron devices.",
+    "additions": 13,
+    "author": "jblox26",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this fix? Running video inference with any `Qwen3VL` model raises `StopIteration` during `model.generate()`: ``` File \".../transformers/models/qwen3_vl/modeling_qwen3_vl.py\", line 1126, in get_rope_index grid_thw = next(grid_i\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44417",
-    "created_at": "2026-03-03T20:15:26Z",
-    "deletions": 6,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44472",
+    "created_at": "2026-03-05T14:50:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44417/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44417",
+    "files_url": "https://github.com/huggingface/transformers/pull/44472/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44472",
     "labels": [],
-    "merged": true,
-    "number": 44417,
+    "merged": false,
+    "number": 44472,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Neuron kernels integration",
-    "updated_at": "2026-03-05T17:09:39Z"
+    "title": "Fix Qwen3VL get_rope_index StopIteration with per-frame video tokens",
+    "updated_at": "2026-03-06T15:15:58Z"
   },
   {
-    "additions": 1,
-    "author": "tyler-romero",
+    "additions": 50,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Register `olmo_hybrid` in `TOKENIZER_MAPPING_NAMES` so auto-tokenizer resolution works, matching the other auto-registrations already in place for this model.",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? Fixes #44466 After `.to(device)`, PyTorch's `Module._apply` may create new `Parameter` objects that no longer share storage with tied weights. This caused `remove_tied_weights_from_state_dict` to fail to detect and\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44416",
-    "created_at": "2026-03-03T19:30:56Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44471",
+    "created_at": "2026-03-05T14:30:17Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44416/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44416",
-    "labels": [],
-    "merged": true,
-    "number": 44416,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[tiny] Add olmo_hybrid to tokenizer auto-mapping",
-    "updated_at": "2026-03-04T19:26:10Z"
-  },
-  {
-    "additions": 2,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR removes @MekkCyber from the PR template. cc @Rocketknight1 you only need to ping me now ;)",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44415",
-    "created_at": "2026-03-03T16:59:08Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44415/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44415",
-    "labels": [],
-    "merged": true,
-    "number": 44415,
+    "files_url": "https://github.com/huggingface/transformers/pull/44471/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44471",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44471,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update PR template",
-    "updated_at": "2026-03-04T14:13:04Z"
+    "title": "Fix tied weights serialization being device-dependent",
+    "updated_at": "2026-03-06T14:03:18Z"
   },
   {
-    "additions": 35,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44303 - see also comments here https://github.com/huggingface/transformers/pull/44316#issuecomment-3984362089. Supersedes https://github.com/huggingface/trans\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44414",
-    "created_at": "2026-03-03T16:47:47Z",
-    "deletions": 39,
+    "additions": 8,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44360 The reference `fp8_index` kernel clamps per-head q\u00b7k scores with `T.max(logits, 0)` before the weighted sum across heads ([kernel.py#L241](https://huggingface.co/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L241\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44360-6",
+    "cluster_ids": [
+      "cluster-44360-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44470",
+    "created_at": "2026-03-05T14:02:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44414/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44414",
-    "labels": [],
-    "merged": true,
-    "number": 44414,
+    "files_url": "https://github.com/huggingface/transformers/pull/44470/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44470",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44470,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Reduce tqdm verbosity during model loading",
-    "updated_at": "2026-03-03T16:57:56Z"
+    "title": "Add missing ReLU in GlmMoeDsaIndexer",
+    "updated_at": "2026-03-05T15:39:38Z"
   },
   {
     "additions": 4,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title.",
+    "body_excerpt": "# What does this PR do? For remote code that behave correctly with tied weights, we need to keep the same behavior as for the main lib, i.e. not remove them from tied weights (as tied weights are marked as missing to avoid inits!!)",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44413",
-    "created_at": "2026-03-03T16:24:43Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44469",
+    "created_at": "2026-03-05T13:51:55Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44413/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44413",
+    "files_url": "https://github.com/huggingface/transformers/pull/44469/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44469",
     "labels": [],
     "merged": true,
-    "number": 44413,
+    "number": 44469,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix peft conversion mappings",
-    "updated_at": "2026-03-03T17:08:39Z"
+    "title": "[remote code/vllm] Fix incorrect tied weights",
+    "updated_at": "2026-03-05T15:07:56Z"
   },
   {
-    "additions": 138,
-    "author": "tarekziade",
+    "additions": 13,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Extends type checking to `src/transformers/quantizers`",
-    "changed_files": 28,
+    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have added_tokens_decoder with specific token_ids, we need to overwrite them in spm model ! example: [UNUSED_TOKEN_146] -> <|im_start|> see internlm2: https://huggingfac\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 25,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44412",
-    "created_at": "2026-03-03T14:53:31Z",
-    "deletions": 74,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44468",
+    "created_at": "2026-03-05T13:48:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44412/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44412",
+    "files_url": "https://github.com/huggingface/transformers/pull/44468/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44468",
     "labels": [],
     "merged": true,
-    "number": 44412,
-    "review_comments_count": 33,
+    "number": 44468,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(typing): Add type checking to `src/transformers/quantizers`",
-    "updated_at": "2026-03-11T11:24:11Z"
+    "title": "Replace placeholder tokens as specified in added_tokens_decoder",
+    "updated_at": "2026-03-05T16:29:13Z"
   },
   {
-    "additions": 59,
-    "author": "burtenshaw",
+    "additions": 346,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates `AGENTS.md` to discourage duplicated and trivial work by agents. - CLAUDE.md-> AGENTS.md - ssue-thread coordination before PRs - mandatory duplicate-PR checks with gh commands - no one-off busywork P\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44411",
-    "created_at": "2026-03-03T11:35:13Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44411/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44411",
-    "labels": [],
-    "merged": true,
-    "number": 44411,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Update agentic contributions guidelines in AGENTS.md to force yielding.",
-    "updated_at": "2026-03-12T09:28:45Z"
-  },
-  {
-    "additions": 38,
-    "author": "zvik",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR allows the Granite-speech model to use hidden states from the encoder hidden layers. This is an internal model option that is required for the next generation of Granite-speech models. ## Changes: - New conf\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have `added_tokens_decoder` with specific token_ids, we need to overwrite them in spm model ! `example: [UNUSED_TOKEN_146] -> <|im_start|>` see internlm2: https://huggin\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44408",
-    "created_at": "2026-03-03T07:50:39Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44467",
+    "created_at": "2026-03-05T13:44:54Z",
+    "deletions": 204,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44408/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44408",
+    "files_url": "https://github.com/huggingface/transformers/pull/44467/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44467",
     "labels": [],
     "merged": false,
-    "number": 44408,
-    "review_comments_count": 5,
+    "number": 44467,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Add option to export  encoder hidden states for Granite-speech",
-    "updated_at": "2026-04-12T11:07:21Z"
+    "title": "Placeholder tokens update",
+    "updated_at": "2026-03-05T13:47:28Z"
   },
   {
-    "additions": 23,
-    "author": "hongping-zh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Adds an \"Energy Efficiency Considerations\" section to the bitsandbytes quantization documentation, providing practical guidance on the energy implications of different quantization configurations. ## Motivation This addresses th\u2026",
-    "changed_files": 1,
+    "additions": 20,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix the loss calculation; we should calculate it on scaled targets. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44407",
-    "created_at": "2026-03-03T04:42:57Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44465",
+    "created_at": "2026-03-05T12:59:23Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44407/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44407",
-    "labels": [],
-    "merged": false,
-    "number": 44407,
+    "files_url": "https://github.com/huggingface/transformers/pull/44465/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44465",
+    "labels": [
+      "bug"
+    ],
+    "merged": true,
+    "number": 44465,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "docs: add energy efficiency considerations to bitsandbytes quantization guide",
-    "updated_at": "2026-03-25T11:53:49Z"
+    "state": "closed",
+    "title": "[timesfm2_5] fix loss scaling",
+    "updated_at": "2026-03-05T14:50:26Z"
   },
   {
-    "additions": 21,
-    "author": "medhakimbedhief",
+    "additions": 16,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Adds sequence-classification support for Qwen3.5 in AutoModelForSequenceClassification. **What does this PR do?** This PR enables loading Qwen3.5 checkpoints with `AutoModelForSequenceClassification`, which previously failed with: `ValueEr\u2026",
-    "changed_files": 5,
+    "body_excerpt": "## What does this PR do? Fixes #44462 When a model's `model_type` (e.g. `\"llama\"`) has no entry in `TOKENIZER_MAPPING_NAMES`, `AutoTokenizer.from_pretrained` falls through to loading the tokenizer class declared in `tokenizer_config.json`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44406",
-    "created_at": "2026-03-03T03:44:37Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44463",
+    "created_at": "2026-03-05T12:45:57Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44406/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44406",
+    "files_url": "https://github.com/huggingface/transformers/pull/44463/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44463",
     "labels": [],
-    "merged": true,
-    "number": 44406,
+    "merged": false,
+    "number": 44463,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add Qwen3.5 support for sequence classification",
-    "updated_at": "2026-04-08T11:33:19Z"
+    "title": "Fix AutoTokenizer ignoring tokenizer.json for unregistered model types",
+    "updated_at": "2026-03-07T13:50:44Z"
   },
   {
-    "additions": 0,
-    "author": "Rocketknight1",
+    "additions": 12,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "Some generate tests have a ~1% chance of generating short outputs because they hit an EOS token early, which causes the test to flake because it asserts the output shape. This PR enforces `min_length` so that doesn't happen!",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? for SP loss we do not have torch device mesh but rather a deepspeed only. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44401",
-    "created_at": "2026-03-02T18:26:56Z",
-    "deletions": 77,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44461",
+    "created_at": "2026-03-05T11:39:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44401/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44401",
+    "files_url": "https://github.com/huggingface/transformers/pull/44461/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44461",
     "labels": [],
     "merged": true,
-    "number": 44401,
-    "review_comments_count": 4,
+    "number": 44461,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Enforce min length in some generate tests",
-    "updated_at": "2026-03-04T14:05:50Z"
+    "title": "[Trainer] fix SP loss",
+    "updated_at": "2026-03-05T13:00:40Z"
   },
   {
-    "additions": 14,
-    "author": "Kokonico",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? As per title, confirms interactive terminal before adding formatting to loading_report output. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear\u2026",
+    "additions": 1,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
     "changed_files": 1,
-    "cluster_id": "cluster-44336-7",
-    "cluster_ids": [
-      "cluster-44336-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44400",
-    "created_at": "2026-03-02T18:21:48Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44400/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44400",
-    "labels": [],
-    "merged": false,
-    "number": 44400,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: interactive terminal checks for formatting in loading_report.py",
-    "updated_at": "2026-03-09T14:46:29Z"
-  },
-  {
-    "additions": 3,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44384 In `Qwen3_5TextModel.forward`, after splitting `position_ids` into `text_position_ids` (index 0, for text) and `position_ids` (indices 1:, for temporal/height/width), the decoder layer call incorrectly passed `posit\u2026",
-    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44399",
-    "created_at": "2026-03-02T17:28:59Z",
-    "deletions": 3,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44460",
+    "created_at": "2026-03-05T10:53:07Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44399/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44399",
+    "files_url": "https://github.com/huggingface/transformers/pull/44460/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44460",
     "labels": [],
     "merged": true,
-    "number": 44399,
+    "number": 44460,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix position_ids typo in Qwen3_5TextModel forward pass",
-    "updated_at": "2026-03-06T01:48:22Z"
+    "title": "trigger tensor parallel utils test in the CI",
+    "updated_at": "2026-03-05T11:25:51Z"
   },
   {
-    "additions": 4,
+    "additions": 95,
     "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds the missing ReLU activation in `GlmMoeDsaIndexer.forward()` on per-head q\u00b7k scores before the weighted sum across heads. The reference DeepSeek-V3.2 `fp8_index` kernel applies `T.max(logits, 0)` (i.e., ReLU) a\u2026",
+    "body_excerpt": "## Summary Fixes #44458 PR #42848 introduced a regression where `torch.compile` on `MllamaForConditionalGeneration` fails with a C++ compile error from the torch inductor backend (`'tmp2' was not declared in this scope`). The root cause is\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-44360-6",
-    "cluster_ids": [
-      "cluster-44360-6"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44398",
-    "created_at": "2026-03-02T16:41:01Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44398/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44398",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44398,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
-    "updated_at": "2026-03-04T13:54:20Z"
-  },
-  {
-    "additions": 86,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44380 `GPT2Attention.forward()` did not pass the `scaling` parameter to `attention_interface`, causing `scale_attn_weights` and `scale_attn_by_inverse_layer_idx` config options to be silently ignored when usi\u2026",
-    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44397",
-    "created_at": "2026-03-02T16:14:37Z",
-    "deletions": 50,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44459",
+    "created_at": "2026-03-05T07:58:28Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44397/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44397",
+    "files_url": "https://github.com/huggingface/transformers/pull/44459/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44459",
     "labels": [],
-    "merged": true,
-    "number": 44397,
-    "review_comments_count": 17,
+    "merged": false,
+    "number": 44459,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix GPT2 attention scaling ignored in SDPA/FlashAttention",
-    "updated_at": "2026-03-04T16:47:42Z"
+    "title": "fix: make Mllama cross attention mask compatible with torch.compile",
+    "updated_at": "2026-03-07T13:50:40Z"
   },
   {
-    "additions": 3,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some renaming should just never be applied when the weight format already matches. (this is actually regardless of remote code). This allows us to remove 1 test added in https://github.com/huggingface/transformers/c\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes this failing [DepthProModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453624#step:14:4893). <img width=\"2231\" height=\"99\" alt=\"image\" src=\"https://github.com\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44396",
-    "created_at": "2026-03-02T15:50:27Z",
-    "deletions": 16,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44456",
+    "created_at": "2026-03-05T06:01:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44396/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44396",
+    "files_url": "https://github.com/huggingface/transformers/pull/44456/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44456",
     "labels": [],
     "merged": true,
-    "number": 44396,
-    "review_comments_count": 2,
+    "number": 44456,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[ Dynamic weight loader] fix remote code when format matches",
-    "updated_at": "2026-03-03T17:53:39Z"
+    "title": "Fix failing `DepthProModelIntegrationTest`",
+    "updated_at": "2026-03-05T14:52:40Z"
   },
   {
-    "additions": 153,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title!",
-    "changed_files": 6,
+    "additions": 3,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Check if accelerator exists before using `pin_memory`. reproduce it on a CPU only node: `python examples/pytorch/continuous_batching_simple.py` output: ``` File \"/home/jiqingfe/transformers/src/transformers/generation/continuous_batching/i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44395",
-    "created_at": "2026-03-02T14:52:12Z",
-    "deletions": 31,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44455",
+    "created_at": "2026-03-05T05:20:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44395/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44395",
+    "files_url": "https://github.com/huggingface/transformers/pull/44455/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44455",
     "labels": [],
     "merged": true,
-    "number": 44395,
-    "review_comments_count": 22,
+    "number": 44455,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix kernels security issue",
-    "updated_at": "2026-03-03T15:28:20Z"
+    "title": "fix pin_memory for contiguous batching",
+    "updated_at": "2026-03-09T13:49:30Z"
   },
   {
-    "additions": 4742,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #43514 precedes this PR",
-    "changed_files": 61,
+    "additions": 17,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Fixes the `_init_weights` method in `PegasusPreTrainedModel` and `MarianPreTrainedModel` to handle sinusoidal position embeddings before calling `super()._init_weights(module)`, preventing the generic `nn.Embedding` branch fro\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44394",
-    "created_at": "2026-03-02T14:49:05Z",
-    "deletions": 7232,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44394/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44394",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44454",
+    "created_at": "2026-03-05T03:51:38Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44454/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44454",
     "labels": [],
     "merged": false,
-    "number": 44394,
+    "number": 44454,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "\ud83d\udea8\ud83d\udea7 FeatureExtractor \u2192 AudioProcessor",
-    "updated_at": "2026-04-03T22:16:41Z"
+    "state": "closed",
+    "title": "Fix Pegasus sinusoidal position embedding init regression in v5",
+    "updated_at": "2026-03-09T02:17:41Z"
   },
   {
-    "additions": 61,
-    "author": "ebezzam",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The acoustic tokenizer was updated to use `VoxtralRealtimeConv1dPaddingCache` in #43625 but the ASR model wasn't updated.",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Fix KeyError in `convert_to_native_format` for dict vocab Fixes #44451 ### Problem `AutoTokenizer.from_pretrained(\"vesteinn/ScandiBERT\")` raises `KeyError: 0` in `convert_to_native_format`. ScandiBERT's `tokenizer_config.json` specifies\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44392",
-    "created_at": "2026-03-02T13:41:19Z",
-    "deletions": 100,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44452",
+    "created_at": "2026-03-05T03:34:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44392/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44392",
+    "files_url": "https://github.com/huggingface/transformers/pull/44452/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44452",
     "labels": [],
     "merged": true,
-    "number": 44392,
+    "number": 44452,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[VibeVoice ASR] Use updated padding cache for ASR model.",
-    "updated_at": "2026-03-02T13:51:02Z"
+    "title": "Fix KeyError in convert_to_native_format for dict vocab",
+    "updated_at": "2026-03-19T13:59:23Z"
   },
   {
-    "additions": 3040,
-    "author": "ebezzam",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Tokenizer was updated to Voxtral cache object in #43625, but forgot to update that of the ASR model",
-    "changed_files": 30,
+    "additions": 297,
+    "author": "sandesh-bhandari-dev",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 32,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44391",
-    "created_at": "2026-03-02T13:29:59Z",
-    "deletions": 311,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44449",
+    "created_at": "2026-03-05T01:40:47Z",
+    "deletions": 319,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44391/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44391",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44449/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44449",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44391,
+    "number": 44449,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[VibeVoice ASR] Use newer cache object for modular",
-    "updated_at": "2026-03-02T13:34:23Z"
+    "title": "fix: 3 bugs : MoE aux loss, ANSI TTY leak, pipeline removed and also task error",
+    "updated_at": "2026-03-05T13:22:40Z"
   },
   {
-    "additions": 3330,
-    "author": "liding-nv",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 13,
+    "additions": 8,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes MiniCPM-o-2_6 related tests failures in vLLM, and improve backward compatibility with remote code in general. Cc @hmellor @zucchini-nlp",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44390",
-    "created_at": "2026-03-02T13:22:21Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44447",
+    "created_at": "2026-03-04T21:55:16Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44390/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44390",
+    "files_url": "https://github.com/huggingface/transformers/pull/44447/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44447",
     "labels": [],
     "merged": true,
-    "number": 44390,
-    "review_comments_count": 20,
+    "number": 44447,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "add support for nemotron_3",
-    "updated_at": "2026-03-03T18:18:50Z"
+    "title": "[vLLM] Fix backward compatibility with hardcoded subprocessors classes in processors",
+    "updated_at": "2026-03-05T16:07:28Z"
   },
   {
-    "additions": 5,
-    "author": "Abdennacer-Badaoui",
+    "additions": 4,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "Adds explicit `timm` installation to the AMD ROCm Docker image. This causes ~200 test failures in AMD CI (e.g., [gemma3n vision tests](https://github.com/huggingface/transformers/actions/runs/22474359922/job/65104428291)). This mirrors wha\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR fixes fsdp loading for rank!=0 as they should stay on meta device. This was reverted in a PR that I can find anymore.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44389",
-    "created_at": "2026-03-02T13:09:55Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44446",
+    "created_at": "2026-03-04T21:38:00Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44389/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44389",
+    "files_url": "https://github.com/huggingface/transformers/pull/44446/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44446",
     "labels": [],
-    "merged": true,
-    "number": 44389,
+    "merged": false,
+    "number": 44446,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[AMD CI] Add missing timm dependency to ROCm Docker image",
-    "updated_at": "2026-03-03T12:00:19Z"
+    "title": "Fix fdsp loading meta device",
+    "updated_at": "2026-03-05T15:07:57Z"
   },
   {
-    "additions": 0,
-    "author": "sahilmaniyar888",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #44336 ### Summary This PR prevents ANSI style escape sequences from being emitted by `loading_report` when stdout is non-interactive (for example, redirected logs/files). ### Changes - Added a small helper `_\u2026",
-    "changed_files": 0,
-    "cluster_id": "cluster-44336-7",
-    "cluster_ids": [
-      "cluster-44336-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44388",
-    "created_at": "2026-03-02T11:40:49Z",
+    "additions": 2282,
+    "author": "gabe-l-hart",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for the forthcoming Granite Docling model based on the Granite 4 LLM architecture (`GraniteMoeHybrid`). ## Draft Status This PR is in draft pending the possibility of some additional changes: -\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44445",
+    "created_at": "2026-03-04T20:54:17Z",
     "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44388/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44388",
-    "labels": [
-      "Code agent slop"
-    ],
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44445/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44445",
+    "labels": [],
     "merged": false,
-    "number": 44388,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix loading report ANSI styles for non-TTY output",
-    "updated_at": "2026-03-11T06:29:31Z"
+    "number": 44445,
+    "review_comments_count": 44,
+    "state": "open",
+    "title": "Adding support for GraniteDoclingHybrid",
+    "updated_at": "2026-03-11T19:19:44Z"
   },
   {
-    "additions": 62,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title",
-    "changed_files": 2,
+    "additions": 7,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing use case was identified and fixed in this PR: \u2192 [TOKENIZER_MAPPING_NAMES](https://github.com/huggingface/transformers/blob/main/src/transformers/models/auto/tokenization_auto.py#L63-L338) doe\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44386",
-    "created_at": "2026-03-02T10:43:13Z",
-    "deletions": 16,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44443",
+    "created_at": "2026-03-04T20:01:55Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44386/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44386",
+    "files_url": "https://github.com/huggingface/transformers/pull/44443/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44443",
     "labels": [],
     "merged": true,
-    "number": 44386,
-    "review_comments_count": 2,
+    "number": 44443,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[higgs-audio-v2] fix sampling",
-    "updated_at": "2026-03-02T13:06:23Z"
+    "title": "fix(tokenizer): Only strip Fast from class names in AutoTokenizer if used as a suffix",
+    "updated_at": "2026-03-09T15:03:49Z"
   },
   {
-    "additions": 8,
+    "additions": 37,
     "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I wasn't able to run make check-repo locally successfully, unless the following 3 fixes were applied.",
+    "body_excerpt": "# What does this PR do? This PR fixes the training of LW-DETR. It turned out that the model was not able to overfit a single batch. Hence I asked Codex to investigate this. It turns out there were 3 bugs: 1. A logits calibration gap, cause\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44385",
-    "created_at": "2026-03-02T09:45:15Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44441",
+    "created_at": "2026-03-04T19:54:20Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44385/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44385",
+    "files_url": "https://github.com/huggingface/transformers/pull/44441/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44441",
     "labels": [],
-    "merged": false,
-    "number": 44385,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix make check-repo",
-    "updated_at": "2026-03-02T09:54:23Z"
+    "merged": true,
+    "number": 44441,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[LW-DETR] Fix training",
+    "updated_at": "2026-03-09T15:36:02Z"
   },
   {
-    "additions": 4,
-    "author": "JJJYmmm",
+    "additions": 11,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix the attr `_no_split_modules` of `Qwen3_5Model` and `Qwen3_5MoeModel`, which affect the FSDP init of hf Trainer.",
-    "changed_files": 3,
+    "body_excerpt": "## What does this PR do? Fixes the noisy `HfHubHTTPError` exception output that appears when loading a transformer model from a repository that has discussions disabled. ### Root cause The `previous_pr()` function in `safetensors_conversio\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44382",
-    "created_at": "2026-03-02T05:42:48Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44440",
+    "created_at": "2026-03-04T18:31:13Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44382/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44382",
-    "labels": [],
-    "merged": true,
-    "number": 44382,
+    "files_url": "https://github.com/huggingface/transformers/pull/44440/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44440",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44440,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Bugfix] fix qwen3.5 no split module",
-    "updated_at": "2026-03-02T16:17:22Z"
+    "title": "fix: catch HfHubHTTPError in safetensors auto_conversion thread",
+    "updated_at": "2026-03-05T15:39:11Z"
   },
   {
-    "additions": 2,
-    "author": "carcel-yu",
+    "additions": 5,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? This PR adds MLU support to `is_torch_bf16_gpu_available()` by checking `torch.mlu.is_bf16_supported()` when an MLU device is available. ### Why is this needed? MLU devices support bf16 training, but they are curr\u2026",
+    "body_excerpt": "# What does this PR do? Fixes this failing [ProphetNetModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453490#step:14:2331). <img width=\"2303\" height=\"165\" alt=\"image\" src=\"https://github.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44381",
-    "created_at": "2026-03-02T05:34:49Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44439",
+    "created_at": "2026-03-04T16:55:50Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44381/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44381",
+    "files_url": "https://github.com/huggingface/transformers/pull/44439/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44439",
     "labels": [],
     "merged": true,
-    "number": 44381,
+    "number": 44439,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add MLU bf16 support to is_torch_bf16_gpu_available",
-    "updated_at": "2026-03-06T14:34:30Z"
+    "title": "Fix failing `ProphetNetModelIntegrationTest`",
+    "updated_at": "2026-03-05T15:43:59Z"
   },
   {
-    "additions": 579,
-    "author": "remi-or",
+    "additions": 135,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# Summary ! This PR is in draft, waiting for https://github.com/huggingface/transformers/pull/44227 to be merged This PR adds support for the `flash_attention_with_kvcache` kernel in continuoys batching. This is very efficient for decode-o\u2026",
-    "changed_files": 14,
+    "body_excerpt": "# What does this PR do? This PR adds flashoptim from databricks team into Trainer ! cc @tomaarsen ### Results ``` Optimizer Loss Time Speed Memory \u0394 Mem \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 AdamW Fused 1.4\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44379",
-    "created_at": "2026-03-01T23:13:17Z",
-    "deletions": 235,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44438",
+    "created_at": "2026-03-04T16:31:35Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44379/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44379",
+    "files_url": "https://github.com/huggingface/transformers/pull/44438/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44438",
     "labels": [],
-    "merged": true,
-    "number": 44379,
-    "review_comments_count": 19,
-    "state": "closed",
-    "title": "[CB] Add paged_attention kernel",
-    "updated_at": "2026-03-09T22:16:31Z"
+    "merged": false,
+    "number": 44438,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add flashoptim",
+    "updated_at": "2026-04-02T13:06:17Z"
   },
   {
-    "additions": 1,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "additions": 150,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
     "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44378",
-    "created_at": "2026-03-01T22:57:50Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44437",
+    "created_at": "2026-03-04T15:34:34Z",
+    "deletions": 134,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44378/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44378",
+    "files_url": "https://github.com/huggingface/transformers/pull/44437/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44437",
     "labels": [],
-    "merged": false,
-    "number": 44378,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44437,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Fix logging with each layer with ms-swift lora fine-tuning ",
-    "updated_at": "2026-03-02T14:18:22Z"
+    "title": "protect imports",
+    "updated_at": "2026-03-04T16:19:49Z"
   },
   {
-    "additions": 175,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes TODO: Implement proper TP support for compressed tensors quantization <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release not\u2026",
+    "additions": 8,
+    "author": "jw9603",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44423 `continuous_batching_chat_completion` was missing input preprocessing and `tokenize=True` in `apply_chat_template`, causing `'str' object has no attribute 'to'` for multimodal models. Added the same `get_model_modality` + `get\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44377",
-    "created_at": "2026-03-01T22:46:07Z",
-    "deletions": 10,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44436",
+    "created_at": "2026-03-04T15:26:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44377/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44377",
+    "files_url": "https://github.com/huggingface/transformers/pull/44436/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44436",
     "labels": [],
-    "merged": false,
-    "number": 44377,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Implement Tensor Parallelism (TP) support for compressed tensors quantization",
-    "updated_at": "2026-03-02T14:15:47Z"
+    "merged": true,
+    "number": 44436,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix continuous batching for multimodal models",
+    "updated_at": "2026-03-09T13:58:37Z"
   },
   {
-    "additions": 4,
-    "author": "stuckvgn",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary The quick-start code examples in `flan-t5.md` and `flan-ul2.md` use `\"A step by step recipe to make bolognese pasta:\"` as the demo prompt, with output that includes `ground beef`. This PR replaces it with `\"A step by step recipe\u2026",
-    "changed_files": 2,
+    "additions": 138,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR adds the option to have a ContinuousBatchingManager not be destroyed after generation is over. This allows the user to re-use the manager without requiring him to know any other entry point for CB apart from `generate_batch` or the\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44376",
-    "created_at": "2026-03-01T17:41:48Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44435",
+    "created_at": "2026-03-04T14:17:08Z",
+    "deletions": 54,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44376/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44376",
+    "files_url": "https://github.com/huggingface/transformers/pull/44435/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44435",
     "labels": [],
-    "merged": false,
-    "number": 44376,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44435,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "docs: update Flan-T5 and Flan-UL2 example to use plant-based recipe prompt",
-    "updated_at": "2026-03-14T06:47:36Z"
+    "title": "[CB] Persistent manager",
+    "updated_at": "2026-03-26T22:02:28Z"
   },
   {
-    "additions": 6829,
-    "author": "NielsRogge",
+    "additions": 413,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds RF-DETR using Codex 5.3. It did everything: modular (in 600 lines of code), fast and slow image processors, conversion script with bells and whistles (setting `id2label` etc.) To do: - [x] verify loss c\u2026",
-    "changed_files": 21,
+    "body_excerpt": "This PR adds a dedicated config for continuous batching, which is starting to have a lot parameters. This will give the user a clear view of what is possible and make adding new parameters easier. No breaking changes through `account_for_c\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44375",
-    "created_at": "2026-03-01T17:32:17Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44434",
+    "created_at": "2026-03-04T13:49:05Z",
+    "deletions": 303,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44375/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44375",
+    "files_url": "https://github.com/huggingface/transformers/pull/44434/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44434",
     "labels": [],
-    "merged": false,
-    "number": 44375,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add RF-DETR",
-    "updated_at": "2026-03-05T16:00:53Z"
+    "merged": true,
+    "number": 44434,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "[CB] Add dedicated config",
+    "updated_at": "2026-03-13T13:56:40Z"
   },
   {
-    "additions": 4,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44373 The `position_ids` parameter docstrings in `_get_unpad_data()` (line 360) and `_upad_input()` (line 413) in `src/transformers/modeling_flash_attention_utils.py` were incorrectly describing `attention_m\u2026",
-    "changed_files": 1,
+    "additions": 177,
+    "author": "leopold-tzafon",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Instead of silently failing when mm_token_type_ids is not passed, derives it in Qwen3 and Qwen3.5. Same as it was before: https://github.com/huggingface/transformers/commit/c281a2de8998e66e93fac30a236225528531df9b P\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44374",
-    "created_at": "2026-03-01T17:07:35Z",
-    "deletions": 2,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44433",
+    "created_at": "2026-03-04T13:46:14Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44374/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44374",
+    "files_url": "https://github.com/huggingface/transformers/pull/44433/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44433",
     "labels": [],
-    "merged": false,
-    "number": 44374,
+    "merged": true,
+    "number": 44433,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix incorrect position_ids docstring in modeling_flash_attention_utils.py",
-    "updated_at": "2026-03-02T14:06:30Z"
+    "title": "fix: raise error if mm_token_type_ids not supplied ",
+    "updated_at": "2026-03-12T17:12:47Z"
   },
   {
-    "additions": 5,
-    "author": "N3u0ns",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44355 `inspect.getsource()` fails with `TypeError` when called on Cython-compiled functions or built-in functions that don't have Python source code. This adds a try/except block to gracefully handle this case by returnin\u2026",
-    "changed_files": 1,
+    "additions": 85,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, instead of having to divide image token by norm scale, we can do it same way as in other model (eg. gemma3) and add a custom embed layer. It should be 100% BC because users usually call `self.embed_tok\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44372",
-    "created_at": "2026-03-01T13:53:58Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44432",
+    "created_at": "2026-03-04T10:04:40Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44372/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44372",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44372,
+    "files_url": "https://github.com/huggingface/transformers/pull/44432/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44432",
+    "labels": [],
+    "merged": true,
+    "number": 44432,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: handle Cython-compiled functions in get_docstring_indentation_level",
-    "updated_at": "2026-03-02T13:39:50Z"
+    "title": "Make paligemma embed tokens standard",
+    "updated_at": "2026-03-11T08:38:41Z"
   },
   {
-    "additions": 12,
-    "author": "leaderofARS",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Fix documentation inconsistencies in integrations folder ## Description This PR addresses documentation errors and inconsistencies across the integrations module, specifically clarifying terminology and deprecation status in two key inte\u2026",
-    "changed_files": 3,
+    "additions": 4094,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Re-opening back a PR on cleaning up clip-like model's backbones. Let's merge it now, I've been seeing quite a lot of ppl reporting it and I am not sure when it will be resolved by the big vision refactor Basically,\u2026",
+    "changed_files": 42,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44369",
-    "created_at": "2026-03-01T07:34:43Z",
-    "deletions": 9,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44431",
+    "created_at": "2026-03-04T10:02:13Z",
+    "deletions": 2220,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44369/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44369",
+    "files_url": "https://github.com/huggingface/transformers/pull/44431/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44431",
     "labels": [],
-    "merged": false,
-    "number": 44369,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Feature/integrations docs fix",
-    "updated_at": "2026-03-06T19:47:39Z"
+    "merged": true,
+    "number": 44431,
+    "review_comments_count": 92,
+    "state": "closed",
+    "title": "Refactor CLIP-like models",
+    "updated_at": "2026-04-10T13:40:31Z"
   },
   {
-    "additions": 171,
-    "author": "jayakumarpujar",
+    "additions": 0,
+    "author": "Rohang2005",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #43701 Models with `_checkpoint_conversion_mapping` (e.g. VLMs like Qwen2.5VL, LLaVA, ColPali, etc.) use a key renaming system: - **Loading** (`from_pretrained`): Checkpoint keys are renamed from original format \u2192 model fo\u2026",
-    "changed_files": 3,
+    "body_excerpt": "## What does this PR do? This PR fixes an inconsistency in the AFMoE module where `past_key_values` was passed to a function argument expecting `past_key_value`. The function signature expects a singular cache object (`past_key_value`), bu\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44366",
-    "created_at": "2026-03-01T03:43:16Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44430",
+    "created_at": "2026-03-04T08:13:38Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44366/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44366",
+    "files_url": "https://github.com/huggingface/transformers/pull/44430/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44430",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44366,
+    "number": 44430,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix resume_from_checkpoint key mismatch for models with _checkpoint_conversion_mapping",
-    "updated_at": "2026-03-02T14:02:22Z"
+    "title": "Fix inconsistent past_key_value/past_key_values usage in AFMoE modeling",
+    "updated_at": "2026-03-04T14:07:32Z"
   },
   {
-    "additions": 8,
-    "author": "jayakumarpujar",
+    "additions": 14,
+    "author": "thakoreh",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44360 - The reference DeepSeek-V3.2 `fp8_index` kernel applies **ReLU** to per-head q\u00b7k scores before weighting and summing across heads: ``` logits[i3_n, i_h] = T.max(logits[i3_n, i_h], 0) * q_s_frag[i_h] ``` [Referenc\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44360-6",
+    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module was using `PALETTE['italic']` and `PALETTE['bold']` directly in string formatting, which caused ANSI escape codes to be emitted even when stdout is not connected to a terminal (e.g., when\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44336-7",
     "cluster_ids": [
-      "cluster-44360-6"
+      "cluster-44336-7"
     ],
     "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44364",
-    "created_at": "2026-03-01T02:19:14Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44429",
+    "created_at": "2026-03-04T07:47:02Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44364/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44364",
+    "files_url": "https://github.com/huggingface/transformers/pull/44429/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44429",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44364,
+    "number": 44429,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
-    "updated_at": "2026-03-02T13:55:48Z"
+    "title": "Fix ANSI codes emitted in loading_report when stdout is not a TTY",
+    "updated_at": "2026-03-04T13:58:46Z"
   },
   {
-    "additions": 57,
-    "author": "jayakumarpujar",
+    "additions": 10,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44428",
+    "created_at": "2026-03-04T07:41:20Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44428/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44428",
+    "labels": [],
+    "merged": true,
+    "number": 44428,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Add XPU Expectations for vibe voice acoustic tokenizer tests",
+    "updated_at": "2026-04-02T03:21:38Z"
+  },
+  {
+    "additions": 43,
+    "author": "Jaredw2289-svg",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44327 - `decode_spans()` in the QA pipeline crashes with `ValueError: kth(=N) out of bounds (N)` when `len(scores_flat) == topk` (e.g., `top_k=100` with `seq_len=10`, since `10\u00b2 = 100`) - Root cause: `np.argpartition(ar\u2026",
+    "body_excerpt": "Fixes #44297 ## Problem `tokenizer.save_pretrained()` overwrites `tokenizer_class` in `tokenizer_config.json` with the current wrapper class (e.g. `PreTrainedTokenizerFast`) instead of preserving the original class from the loaded config (\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44363",
-    "created_at": "2026-03-01T01:47:44Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44427",
+    "created_at": "2026-03-04T06:03:56Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44363/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44363",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44427/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44427",
+    "labels": [],
     "merged": false,
-    "number": 44363,
+    "number": 44427,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix off-by-one in decode_spans causing ValueError with np.argpartition",
-    "updated_at": "2026-03-02T13:03:02Z"
+    "title": "fix(tokenization): preserve original tokenizer_class in save_pretrained",
+    "updated_at": "2026-03-11T02:59:12Z"
   },
   {
-    "additions": 4,
-    "author": "harshaljanjani",
+    "additions": 29,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 [MIGRATION_GUIDE_V5.md](https://github.com/harshaljanjani/transformers/blob/main/MIGRATION_GUIDE_V5.md) states that v5 renamed `additional_\u2026",
+    "body_excerpt": "@IlyasMoutawwakil Can you help review? Thx!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44362",
-    "created_at": "2026-02-28T20:04:05Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44426",
+    "created_at": "2026-03-04T05:57:34Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44362/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44362",
+    "files_url": "https://github.com/huggingface/transformers/pull/44426/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44426",
     "labels": [],
     "merged": true,
-    "number": 44362,
-    "review_comments_count": 0,
+    "number": 44426,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix(tokenizer): Fix MLukeTokenizer AttributeError post-v5 refactor",
-    "updated_at": "2026-03-02T14:51:18Z"
+    "title": "update the expected output for qwen2_5_vl w/ pytorch 2.10 XPU",
+    "updated_at": "2026-04-09T02:32:41Z"
   },
   {
-    "additions": 341,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds a workaround for the PyTorch MPS `sdpa_vector_2pass_mps` correctness bug ([pytorch/pytorch#174861](https://github.com/pytorch/pytorch/issues/174861)). **The problem:** On Apple Silicon with MPS backend, `F.scal\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "I believe the second `if` should be `elif` so the else branch only triggers when neither the string-truncation NOR the float-formatting conditions apply. Otherwise it overwrites the truncation message with the original long string.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44359",
-    "created_at": "2026-02-28T17:47:01Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44425",
+    "created_at": "2026-03-04T02:48:00Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44359/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44359",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44425/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44425",
+    "labels": [],
     "merged": false,
-    "number": 44359,
+    "number": 44425,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(sdpa): add workaround for MPS sdpa_vector_2pass_mps correctness bug",
-    "updated_at": "2026-03-02T13:54:58Z"
+    "state": "open",
+    "title": "Fix conditional check for float formatting",
+    "updated_at": "2026-03-04T02:48:41Z"
   },
   {
     "additions": 6,
-    "author": "hardikmeisheri",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary - `ShieldGemma2ForImageClassification` was missing `_tied_weights_keys`, so `model.lm_head.weight` was randomly re-initialized on every `from_pretrained` call instead of being tied to `embed_tokens.weight`. - This caused non-det\u2026",
+    "author": "jw9603",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `AttributeError: 'str' object has no attribute 'to'` when using `transformers serve --continuous-batching` with multimodal models like Qwen3.5-9B. `processor.apply_chat_template()` returns a plain string (not\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44358",
-    "created_at": "2026-02-28T16:49:27Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44424",
+    "created_at": "2026-03-04T00:56:08Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44358/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44358",
+    "files_url": "https://github.com/huggingface/transformers/pull/44424/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44424",
     "labels": [],
-    "merged": true,
-    "number": 44358,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 44424,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ShieldGemma2 non-reproducible outputs by adding _tied_weights_keys",
-    "updated_at": "2026-03-16T20:02:09Z"
+    "title": "Fix `transformers serve --continuous-batching` for multimodal models",
+    "updated_at": "2026-03-05T09:16:25Z"
   },
   {
-    "additions": 482,
-    "author": "NabilMch",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 127,
+    "additions": 117,
+    "author": "mitre88",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds a Spanish (es) translation of the `conversations.md` guide, which covers the fundamentals of using chat models in Transformers. ### Translated sections: - Chat CLI usage - TextGenerationPipeline in chat mode -\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44357",
-    "created_at": "2026-02-28T15:11:37Z",
-    "deletions": 489,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44422",
+    "created_at": "2026-03-04T00:42:43Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44357/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44357",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44357,
+    "files_url": "https://github.com/huggingface/transformers/pull/44422/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44422",
+    "labels": [],
+    "merged": true,
+    "number": 44422,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix RoPE inv_freq default initialization (Issue #39753)",
-    "updated_at": "2026-03-02T13:50:00Z"
+    "title": "docs: add Spanish translation for conversations.md (chat basics)",
+    "updated_at": "2026-03-04T16:45:24Z"
   },
   {
-    "additions": 6,
-    "author": "iamaber",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Checks if model is already in target dtype before casting to avoid redundant copies that cause 25% performance degradation with `--fp16_full_eval`. ## Changes - Added dtype check before casting model to fp16/bf16 in `evaluation_\u2026",
-    "changed_files": 1,
+    "additions": 309,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? When we shard weights according to a TP plan, we do not update the corresponding parent module attributes. For instance if we shard the weight of a `torch.nn.Linear`, we should also update its `in_features` or `out_\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44356",
-    "created_at": "2026-02-28T14:24:32Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44421",
+    "created_at": "2026-03-03T22:51:47Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44356/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44356",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44356,
+    "files_url": "https://github.com/huggingface/transformers/pull/44421/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44421",
+    "labels": [],
+    "merged": true,
+    "number": 44421,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: avoid redundant fp16/bf16 model casts in evaluation_loop",
-    "updated_at": "2026-03-02T13:59:38Z"
+    "title": "Update parent module attributes when sharding with TP",
+    "updated_at": "2026-03-05T23:32:06Z"
   },
   {
-    "additions": 73,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes an off-by-one error in `decode_spans()` where `np.argpartition` is called with `kth == len(arr)` when `topk` equals the number of candidate scores. This raises `ValueError: kth(=N) out of bounds (N)`. **Root\u2026",
-    "changed_files": 2,
+    "additions": 249,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- removes \"Number of accelerators\" section from \"Accelerator selection\" guide since this is probably pretty commonly known - add a new \"DDP\" guide - refactored \"Accelerate\" guide with a more focused overview of what it is and how to config\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44354",
-    "created_at": "2026-02-28T08:46:39Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44420",
+    "created_at": "2026-03-03T22:41:59Z",
+    "deletions": 250,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44354/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44354",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44420/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44420",
+    "labels": [],
     "merged": false,
-    "number": 44354,
+    "number": 44420,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: off-by-one in decode_spans causes ValueError when topk == len(scores)",
-    "updated_at": "2026-03-02T13:02:38Z"
+    "state": "open",
+    "title": "[docs] distributed training",
+    "updated_at": "2026-03-11T17:36:12Z"
   },
   {
-    "additions": 50,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil , pls help review, thx!",
+    "additions": 6,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? To be merged after #44302 and https://github.com/huggingface/kernels/pull/285. It adds the `neuron` device in checks for custom kernels, enabling to load kernels for Neuron devices.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44353",
-    "created_at": "2026-02-28T07:50:37Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44417",
+    "created_at": "2026-03-03T20:15:26Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44353/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44353",
+    "files_url": "https://github.com/huggingface/transformers/pull/44417/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44417",
     "labels": [],
     "merged": true,
-    "number": 44353,
-    "review_comments_count": 1,
+    "number": 44417,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "add expectations for xpu for olmo_hybrid model",
-    "updated_at": "2026-04-02T03:22:06Z"
+    "title": "Neuron kernels integration",
+    "updated_at": "2026-03-05T17:09:39Z"
   },
   {
-    "additions": 13,
-    "author": "giulio-leone",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module emitted **bold/italic ANSI escape codes** even when `stdout` was not connected to a terminal (e.g. piped or redirected output). While `_color()` already gated color codes behind `sys.stdo\u2026",
+    "additions": 1,
+    "author": "tyler-romero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Register `olmo_hybrid` in `TOKENIZER_MAPPING_NAMES` so auto-tokenizer resolution works, matching the other auto-registrations already in place for this model.",
     "changed_files": 1,
-    "cluster_id": "cluster-44336-7",
-    "cluster_ids": [
-      "cluster-44336-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44352",
-    "created_at": "2026-02-28T06:22:19Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44352/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44352",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44352,
-    "review_comments_count": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44416",
+    "created_at": "2026-03-03T19:30:56Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44416/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44416",
+    "labels": [],
+    "merged": true,
+    "number": 44416,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix: suppress ANSI escape codes when stdout is not a terminal",
-    "updated_at": "2026-03-02T13:59:15Z"
+    "title": "[tiny] Add olmo_hybrid to tokenizer auto-mapping",
+    "updated_at": "2026-03-04T19:26:10Z"
   },
   {
-    "additions": 7,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 2,
+    "additions": 2,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR removes @MekkCyber from the PR template. cc @Rocketknight1 you only need to ping me now ;)",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44350",
-    "created_at": "2026-02-28T03:20:47Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44415",
+    "created_at": "2026-03-03T16:59:08Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44350/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44350",
+    "files_url": "https://github.com/huggingface/transformers/pull/44415/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44415",
     "labels": [],
     "merged": true,
-    "number": 44350,
-    "review_comments_count": 6,
+    "number": 44415,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "skip 1 invalid test case for higgs_audio_v2",
-    "updated_at": "2026-04-09T02:32:42Z"
+    "title": "Update PR template",
+    "updated_at": "2026-03-04T14:13:04Z"
   },
   {
-    "additions": 49,
-    "author": "zzc0430",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? > Inspired by https://github.com/huggingface/transformers/pull/44347#issuecomment-3976028358 Fixes `transformers serve` failing with hybrid models like Qwen3.5 that use `linear_attention` layers. Two issues are addr\u2026",
-    "changed_files": 3,
+    "additions": 35,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44303 - see also comments here https://github.com/huggingface/transformers/pull/44316#issuecomment-3984362089. Supersedes https://github.com/huggingface/trans\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44349",
-    "created_at": "2026-02-28T03:09:30Z",
-    "deletions": 3,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44414",
+    "created_at": "2026-03-03T16:47:47Z",
+    "deletions": 39,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44349/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44349",
+    "files_url": "https://github.com/huggingface/transformers/pull/44414/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44414",
     "labels": [],
-    "merged": false,
-    "number": 44349,
+    "merged": true,
+    "number": 44414,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: support linear_attention in continuous batching and fix serve ch\u2026",
-    "updated_at": "2026-03-02T13:48:04Z"
+    "title": "Reduce tqdm verbosity during model loading",
+    "updated_at": "2026-03-03T16:57:56Z"
   },
   {
-    "additions": 341,
-    "author": "n0kovo",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Most quantized models for Apple Silicon on the Hub are in MLX format. The `MetalConfig` quantization backend supports on-the-fly quantization of standard checkpoints but cannot load pre-quantized MLX models. This PR fixes the fi\u2026",
-    "changed_files": 4,
+    "additions": 4,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44348",
-    "created_at": "2026-02-28T00:24:32Z",
-    "deletions": 32,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44413",
+    "created_at": "2026-03-03T16:24:43Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44348/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44348",
+    "files_url": "https://github.com/huggingface/transformers/pull/44413/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44413",
     "labels": [],
-    "merged": false,
-    "number": 44348,
+    "merged": true,
+    "number": 44413,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Enable MetalConfig to load pre-quantized MLX models from HuggingFace Hub",
-    "updated_at": "2026-03-02T17:18:46Z"
+    "state": "closed",
+    "title": "Fix peft conversion mappings",
+    "updated_at": "2026-03-03T17:08:39Z"
   },
   {
-    "additions": 49,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `AttributeError` when using continuous batching with composite model configs (e.g. `Qwen3_5Config` for vision-language models). Composite configs store attributes like `num_attention_heads` and `num_key_value\u2026",
-    "changed_files": 2,
+    "additions": 138,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Extends type checking to `src/transformers/quantizers`",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44347",
-    "created_at": "2026-02-27T22:48:49Z",
-    "deletions": 8,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44412",
+    "created_at": "2026-03-03T14:53:31Z",
+    "deletions": 74,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44347/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44347",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44347,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44412/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44412",
+    "labels": [],
+    "merged": true,
+    "number": 44412,
+    "review_comments_count": 33,
     "state": "closed",
-    "title": "fix: resolve composite config in PagedAttentionCache and group_layers_by_attn_type",
-    "updated_at": "2026-03-02T13:41:23Z"
+    "title": "chore(typing): Add type checking to `src/transformers/quantizers`",
+    "updated_at": "2026-03-11T11:24:11Z"
   },
   {
-    "additions": 4,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes the `q_a_layernorm` and `kv_a_layernorm` in DeepSeek V2/V3 MLA attention to explicitly receive `config.rms_norm_eps` instead of falling back to the RMSNorm class default (`1e-6`). **The problem:** All other RM\u2026",
-    "changed_files": 2,
+    "additions": 59,
+    "author": "burtenshaw",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates `AGENTS.md` to discourage duplicated and trivial work by agents. - CLAUDE.md-> AGENTS.md - ssue-thread coordination before PRs - mandatory duplicate-PR checks with gh commands - no one-off busywork P\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44346",
-    "created_at": "2026-02-27T21:47:45Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44411",
+    "created_at": "2026-03-03T11:35:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44346/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44346",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44346,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44411/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44411",
+    "labels": [],
+    "merged": true,
+    "number": 44411,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix(deepseek): pass config.rms_norm_eps to MLA q/kv layernorms",
-    "updated_at": "2026-03-02T13:26:21Z"
+    "title": "Update agentic contributions guidelines in AGENTS.md to force yielding.",
+    "updated_at": "2026-03-12T09:28:45Z"
   },
   {
-    "additions": 13,
-    "author": "manavshrivastavagit",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44303 When redirecting `from_pretrained` output to a log file (e.g. in CI), the \"Loading weights\" tqdm bar was updating its postfix with `Materializing param=...` on every parameter, producing huge log files. ## Change -\u2026",
+    "additions": 38,
+    "author": "zvik",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR allows the Granite-speech model to use hidden states from the encoder hidden layers. This is an internal model option that is required for the next generation of Granite-speech models. ## Changes: - New conf\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44345",
-    "created_at": "2026-02-27T21:05:22Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44408",
+    "created_at": "2026-03-03T07:50:39Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44345/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44345",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44408/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44408",
+    "labels": [],
     "merged": false,
-    "number": 44345,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Less verbose weight-loading tqdm when stdout is not a TTY (fixes #44303)",
-    "updated_at": "2026-03-02T13:49:11Z"
+    "number": 44408,
+    "review_comments_count": 9,
+    "state": "open",
+    "title": "Add option to export  encoder hidden states for Granite-speech",
+    "updated_at": "2026-04-14T13:48:55Z"
   },
   {
-    "additions": 6,
-    "author": "manavshrivastavagit",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44297 Qwen3.5 models on the Hub (e.g. [Qwen/Qwen3.5-27B](https://huggingface.co/Qwen/Qwen3.5-27B)) use `\"tokenizer_class\": \"Qwen2Tokenizer\"` in `tokenizer_config.json`, but `TOKENIZER_MAPPING_NAMES` had `qwen3_5` \u2192 `\"Qwen\u2026",
+    "additions": 23,
+    "author": "hongping-zh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Adds an \"Energy Efficiency Considerations\" section to the bitsandbytes quantization documentation, providing practical guidance on the energy implications of different quantization configurations. ## Motivation This addresses th\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44344",
-    "created_at": "2026-02-27T21:04:27Z",
-    "deletions": 2,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44407",
+    "created_at": "2026-03-03T04:42:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44344/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44344",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44407/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44407",
+    "labels": [],
     "merged": false,
-    "number": 44344,
+    "number": 44407,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix tokenizer_class in tokenizer_config.json for Qwen3.5 save_pretrained (fixes #44297)",
-    "updated_at": "2026-03-02T13:17:41Z"
+    "state": "open",
+    "title": "docs: add energy efficiency considerations to bitsandbytes quantization guide",
+    "updated_at": "2026-03-25T11:53:49Z"
   },
   {
-    "additions": 16,
-    "author": "manavshrivastavagit",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 `utils/loading_report.py` was emitting ANSI codes for **bold** and *italic* via `PALETTE['bold']` and `PALETTE['italic']` without checking if stdout is connected to a terminal. `_color()` already respects `sys.stdou\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44336-7",
-    "cluster_ids": [
-      "cluster-44336-7"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44343",
-    "created_at": "2026-02-27T20:58:33Z",
-    "deletions": 9,
+    "additions": 21,
+    "author": "medhakimbedhief",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Adds sequence-classification support for Qwen3.5 in AutoModelForSequenceClassification. **What does this PR do?** This PR enables loading Qwen3.5 checkpoints with `AutoModelForSequenceClassification`, which previously failed with: `ValueEr\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44406",
+    "created_at": "2026-03-03T03:44:37Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44343/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44343",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44343,
+    "files_url": "https://github.com/huggingface/transformers/pull/44406/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44406",
+    "labels": [],
+    "merged": true,
+    "number": 44406,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ANSI codes in loading_report when stdout is not a TTY (fixes #44336)",
-    "updated_at": "2026-03-02T13:44:43Z"
+    "title": "Add Qwen3.5 support for sequence classification",
+    "updated_at": "2026-04-08T11:33:19Z"
   },
   {
-    "additions": 384,
-    "author": "stevhliu",
+    "additions": 0,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "- created a new performance section divided into memory and speed optimizations - model memory training anatomy [guide](https://huggingface.co/docs/transformers/main/en/model_memory_anatomy) is now the more descriptive and simplified GPU m\u2026",
-    "changed_files": 9,
+    "body_excerpt": "Some generate tests have a ~1% chance of generating short outputs because they hit an EOS token early, which causes the test to flake because it asserts the output shape. This PR enforces `min_length` so that doesn't happen!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44342",
-    "created_at": "2026-02-27T20:10:49Z",
-    "deletions": 274,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44401",
+    "created_at": "2026-03-02T18:26:56Z",
+    "deletions": 77,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44342/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44342",
+    "files_url": "https://github.com/huggingface/transformers/pull/44401/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44401",
     "labels": [],
     "merged": true,
-    "number": 44342,
-    "review_comments_count": 12,
+    "number": 44401,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "[docs] training performance",
-    "updated_at": "2026-04-09T20:43:32Z"
+    "title": "Enforce min length in some generate tests",
+    "updated_at": "2026-03-04T14:05:50Z"
   },
   {
-    "additions": 12,
+    "additions": 14,
     "author": "Kokonico",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #44336 ## Changes * Added a new `_palette` function to return the ANSI code for a given color or format only if `sys.stdout` is interactive. (`src/transformers/utils/loading_report.py`) * Updated all usages of `PALETTE[<format>]` in\u2026",
+    "body_excerpt": "# What does this PR do? As per title, confirms interactive terminal before adding formatting to loading_report output. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear\u2026",
     "changed_files": 1,
     "cluster_id": "cluster-44336-7",
     "cluster_ids": [
       "cluster-44336-7"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44341",
-    "created_at": "2026-02-27T19:30:30Z",
-    "deletions": 10,
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44400",
+    "created_at": "2026-03-02T18:21:48Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44341/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44341",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44400/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44400",
+    "labels": [],
     "merged": false,
-    "number": 44341,
-    "review_comments_count": 2,
+    "number": 44400,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix and optimize ANSI color handling in loading report for interactive terminals",
-    "updated_at": "2026-03-02T18:16:00Z"
+    "title": "fix: interactive terminal checks for formatting in loading_report.py",
+    "updated_at": "2026-03-09T14:46:29Z"
   },
   {
-    "additions": 33,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some speculative tests seem flaky with SDPA but reliable with `eager` attention. In local testing, `test_speculative_decoding_equals_regular_decoding` fails 5-10% of the time without this change. and I also saw CI failures. Failures are re\u2026",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44384 In `Qwen3_5TextModel.forward`, after splitting `position_ids` into `text_position_ids` (index 0, for text) and `position_ids` (indices 1:, for temporal/height/width), the decoder layer call incorrectly passed `posit\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44340",
-    "created_at": "2026-02-27T18:09:09Z",
-    "deletions": 27,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44399",
+    "created_at": "2026-03-02T17:28:59Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44340/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44340",
+    "files_url": "https://github.com/huggingface/transformers/pull/44399/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44399",
     "labels": [],
     "merged": true,
-    "number": 44340,
-    "review_comments_count": 6,
+    "number": 44399,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix speculative tests that are flaky with SDPA",
-    "updated_at": "2026-03-02T17:18:27Z"
+    "title": "Fix position_ids typo in Qwen3_5TextModel forward pass",
+    "updated_at": "2026-03-06T01:48:22Z"
   },
   {
-    "additions": 6221,
-    "author": "harshaljanjani",
+    "additions": 4,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 This PR adds **DEIMv2** to Transformers! \u2192 **IMP:** I've linked two notebooks: a [Colab notebook here](https://colab.research.google.com/drive/1jCNefxrKiHWdBEIYTcU3jsd9xyWDwIxC?usp=sharing) demonstrating the fun\u2026",
-    "changed_files": 16,
-    "cluster_id": "cluster-41211-3",
+    "body_excerpt": "## What does this PR do? Adds the missing ReLU activation in `GlmMoeDsaIndexer.forward()` on per-head q\u00b7k scores before the weighted sum across heads. The reference DeepSeek-V3.2 `fp8_index` kernel applies `T.max(logits, 0)` (i.e., ReLU) a\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44360-6",
     "cluster_ids": [
-      "cluster-41211-3"
+      "cluster-44360-6"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 19,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44339",
-    "created_at": "2026-02-27T18:08:53Z",
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44398",
+    "created_at": "2026-03-02T16:41:01Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44339/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44339",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44398/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44398",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44339,
-    "review_comments_count": 199,
-    "state": "open",
-    "title": "model: Add DEIMv2 to Transformers",
-    "updated_at": "2026-04-02T19:53:45Z"
+    "number": 44398,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
+    "updated_at": "2026-03-04T13:54:20Z"
   },
   {
-    "additions": 3641,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR rework a bit how distributed tests are tested. I tried to keep some of the existing tests and added new tests also. For each of these distributed methods (ddp, fsdp, deepspeeed), we have some common tests li\u2026",
-    "changed_files": 38,
+    "additions": 86,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44380 `GPT2Attention.forward()` did not pass the `scaling` parameter to `attention_interface`, causing `scale_attn_weights` and `scale_attn_by_inverse_layer_idx` config options to be silently ignored when usi\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44338",
-    "created_at": "2026-02-27T17:50:16Z",
-    "deletions": 3762,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44397",
+    "created_at": "2026-03-02T16:14:37Z",
+    "deletions": 50,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44338/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44338",
+    "files_url": "https://github.com/huggingface/transformers/pull/44397/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44397",
     "labels": [],
     "merged": true,
-    "number": 44338,
-    "review_comments_count": 0,
+    "number": 44397,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "Update distributed tests",
-    "updated_at": "2026-03-05T23:35:36Z"
+    "title": "Fix GPT2 attention scaling ignored in SDPA/FlashAttention",
+    "updated_at": "2026-03-04T16:47:42Z"
   },
   {
-    "additions": 2,
-    "author": "stevhliu",
+    "additions": 3,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "- moves `kernels-community/flash-attn2:FlashAttention2` to `from_pretrained(attn_implementation...)` - fix error message for registering a kernel",
+    "body_excerpt": "# What does this PR do? Some renaming should just never be applied when the weight format already matches. (this is actually regardless of remote code). This allows us to remove 1 test added in https://github.com/huggingface/transformers/c\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44337",
-    "created_at": "2026-02-27T17:36:54Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44396",
+    "created_at": "2026-03-02T15:50:27Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44337/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44337",
+    "files_url": "https://github.com/huggingface/transformers/pull/44396/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44396",
     "labels": [],
     "merged": true,
-    "number": 44337,
-    "review_comments_count": 0,
+    "number": 44396,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[docs] kernelconfig fix",
-    "updated_at": "2026-02-27T22:46:30Z"
+    "title": "[ Dynamic weight loader] fix remote code when format matches",
+    "updated_at": "2026-03-03T17:53:39Z"
   },
   {
-    "additions": 57,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the RoFormer model to use the `@capture_outputs` and `@can_return_tuple` decorators, following the established pattern (similar to #44047 for Bloom, #44151 for BioGPT, etc.). ### Changes: - **`RoFormerMod\u2026",
-    "changed_files": 1,
+    "additions": 153,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title!",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44335",
-    "created_at": "2026-02-27T17:23:01Z",
-    "deletions": 172,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44395",
+    "created_at": "2026-03-02T14:52:12Z",
+    "deletions": 31,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44335/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44335",
+    "files_url": "https://github.com/huggingface/transformers/pull/44395/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44395",
+    "labels": [],
+    "merged": true,
+    "number": 44395,
+    "review_comments_count": 22,
+    "state": "closed",
+    "title": "Fix kernels security issue",
+    "updated_at": "2026-03-03T15:28:20Z"
+  },
+  {
+    "additions": 4742,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? #43514 precedes this PR",
+    "changed_files": 61,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44394",
+    "created_at": "2026-03-02T14:49:05Z",
+    "deletions": 7232,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44394/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44394",
     "labels": [],
     "merged": false,
-    "number": 44335,
+    "number": 44394,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor RoFormer output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:29:59Z"
+    "state": "open",
+    "title": "\ud83d\udea8\ud83d\udea7 FeatureExtractor \u2192 AudioProcessor",
+    "updated_at": "2026-04-03T22:16:41Z"
   },
   {
-    "additions": 1,
-    "author": "NielsRogge",
+    "additions": 61,
+    "author": "ebezzam",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I had some issues with running `transformers-cli add-new-model-like`. This PR fixes it. Fixes #44661.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? The acoustic tokenizer was updated to use `VoxtralRealtimeConv1dPaddingCache` in #43625 but the ASR model wasn't updated.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44334",
-    "created_at": "2026-02-27T17:13:44Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44392",
+    "created_at": "2026-03-02T13:41:19Z",
+    "deletions": 100,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44334/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44334",
+    "files_url": "https://github.com/huggingface/transformers/pull/44392/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44392",
     "labels": [],
     "merged": true,
-    "number": 44334,
+    "number": 44392,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CookieCutter",
-    "updated_at": "2026-03-13T17:13:28Z"
+    "title": "[VibeVoice ASR] Use updated padding cache for ASR model.",
+    "updated_at": "2026-03-02T13:51:02Z"
   },
   {
-    "additions": 13,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the ALBERT model to use named attribute access instead of index-based access on model outputs, and removes redundant `return_dict=True` arguments from inner model calls (already handled by `@capture_outpu\u2026",
-    "changed_files": 1,
+    "additions": 3040,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tokenizer was updated to Voxtral cache object in #43625, but forgot to update that of the ASR model",
+    "changed_files": 30,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44333",
-    "created_at": "2026-02-27T17:12:25Z",
-    "deletions": 18,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44391",
+    "created_at": "2026-03-02T13:29:59Z",
+    "deletions": 311,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44333/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44333",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44391/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44391",
+    "labels": [],
     "merged": false,
-    "number": 44333,
+    "number": 44391,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ALBERT to use named attributes and remove redundant return_dict=True",
-    "updated_at": "2026-03-02T13:05:54Z"
+    "title": "[VibeVoice ASR] Use newer cache object for modular",
+    "updated_at": "2026-03-02T13:34:23Z"
   },
   {
-    "additions": 3,
-    "author": "tysoncung",
+    "additions": 3330,
+    "author": "liding-nv",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fix minor typos found in comments and docstrings: - `orignal` \u2192 `original` in `src/transformers/integrations/peft.py` (lines 245, 284) - Duplicate word `is is` \u2192 `is` in `src/transformers/models/dia/processing_dia.py` (line 89) Small clean\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44332",
-    "created_at": "2026-02-27T16:11:46Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44390",
+    "created_at": "2026-03-02T13:22:21Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44332/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44332",
+    "files_url": "https://github.com/huggingface/transformers/pull/44390/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44390",
     "labels": [],
     "merged": true,
-    "number": 44332,
-    "review_comments_count": 0,
+    "number": 44390,
+    "review_comments_count": 20,
     "state": "closed",
-    "title": "Fix typos in comments and docstrings",
-    "updated_at": "2026-02-27T18:02:59Z"
+    "title": "add support for nemotron_3",
+    "updated_at": "2026-03-03T18:18:50Z"
   },
   {
-    "additions": 33,
-    "author": "kashif",
+    "additions": 5,
+    "author": "Abdennacer-Badaoui",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? fixed the bfloat16 dtype mismatch and Loss computation shape mismatch. Also added tests for these. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to ap\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Adds explicit `timm` installation to the AMD ROCm Docker image. This causes ~200 test failures in AMD CI (e.g., [gemma3n vision tests](https://github.com/huggingface/transformers/actions/runs/22474359922/job/65104428291)). This mirrors wha\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44331",
-    "created_at": "2026-02-27T15:46:08Z",
-    "deletions": 10,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44389",
+    "created_at": "2026-03-02T13:09:55Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44331/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44331",
+    "files_url": "https://github.com/huggingface/transformers/pull/44389/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44389",
     "labels": [],
     "merged": true,
-    "number": 44331,
-    "review_comments_count": 9,
+    "number": 44389,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[timesfm2_5] fix timesfm2.5 loss",
-    "updated_at": "2026-03-03T17:22:56Z"
+    "title": "[AMD CI] Add missing timm dependency to ROCm Docker image",
+    "updated_at": "2026-03-03T12:00:19Z"
   },
   {
-    "additions": 289,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title! Follow-up of https://github.com/huggingface/transformers/pull/44181 with more models!",
-    "changed_files": 136,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44330",
-    "created_at": "2026-02-27T15:33:02Z",
-    "deletions": 1682,
+    "additions": 0,
+    "author": "sahilmaniyar888",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes #44336 ### Summary This PR prevents ANSI style escape sequences from being emitted by `loading_report` when stdout is non-interactive (for example, redirected logs/files). ### Changes - Added a small helper `_\u2026",
+    "changed_files": 0,
+    "cluster_id": "cluster-44336-7",
+    "cluster_ids": [
+      "cluster-44336-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44388",
+    "created_at": "2026-03-02T11:40:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44330/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44330",
-    "labels": [],
-    "merged": true,
-    "number": 44330,
-    "review_comments_count": 13,
+    "files_url": "https://github.com/huggingface/transformers/pull/44388/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44388",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44388,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove `cache_position` in more models",
-    "updated_at": "2026-03-11T14:47:50Z"
+    "title": "Fix loading report ANSI styles for non-TTY output",
+    "updated_at": "2026-03-11T06:29:31Z"
   },
   {
-    "additions": 3,
-    "author": "linfeng-du",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 62,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44329",
-    "created_at": "2026-02-27T15:27:39Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44386",
+    "created_at": "2026-03-02T10:43:13Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44329/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44329",
+    "files_url": "https://github.com/huggingface/transformers/pull/44386/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44386",
     "labels": [],
     "merged": true,
-    "number": 44329,
-    "review_comments_count": 4,
+    "number": 44386,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Enable Liger Kernel when doing hyperparameter search.",
-    "updated_at": "2026-03-03T13:44:56Z"
+    "title": "[higgs-audio-v2] fix sampling",
+    "updated_at": "2026-03-02T13:06:23Z"
   },
   {
-    "additions": 92,
-    "author": "SunMarc",
+    "additions": 8,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds guidelines for agents when it comes to add/run trainer tests. This needs to be updated as we modify, refactor the code !",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? I wasn't able to run make check-repo locally successfully, unless the following 3 fixes were applied.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44328",
-    "created_at": "2026-02-27T15:17:24Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44385",
+    "created_at": "2026-03-02T09:45:15Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44328/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44328",
+    "files_url": "https://github.com/huggingface/transformers/pull/44385/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44385",
     "labels": [],
-    "merged": true,
-    "number": 44328,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Add testing guide for agents for trainer tests",
-    "updated_at": "2026-02-27T17:32:11Z"
+    "merged": false,
+    "number": 44385,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix make check-repo",
+    "updated_at": "2026-03-02T09:54:23Z"
   },
   {
-    "additions": 38,
-    "author": "overcastbulb",
-    "author_association": "NONE",
-    "body_excerpt": "Adds missing pipeline tutorial example for zero-shot-classification following the existing format of other task examples in pipeline_tutorial.md. Related: #18926",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix the attr `_no_split_modules` of `Qwen3_5Model` and `Qwen3_5MoeModel`, which affect the FSDP init of hf Trainer.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44326",
-    "created_at": "2026-02-27T14:37:19Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44382",
+    "created_at": "2026-03-02T05:42:48Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44326/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44326",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44326,
+    "files_url": "https://github.com/huggingface/transformers/pull/44382/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44382",
+    "labels": [],
+    "merged": true,
+    "number": 44382,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add zero-shot-classification example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:46:24Z"
+    "title": "[Bugfix] fix qwen3.5 no split module",
+    "updated_at": "2026-03-02T16:17:22Z"
   },
   {
-    "additions": 4,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The `CLIPMLP` has the bias set to True but timesFM 2.5 uses `bias=False` in the pretrained model <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appe\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "carcel-yu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? This PR adds MLU support to `is_torch_bf16_gpu_available()` by checking `torch.mlu.is_bf16_supported()` when an MLU device is available. ### Why is this needed? MLU devices support bf16 training, but they are curr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44325",
-    "created_at": "2026-02-27T13:18:40Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44381",
+    "created_at": "2026-03-02T05:34:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44325/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44325",
+    "files_url": "https://github.com/huggingface/transformers/pull/44381/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44381",
     "labels": [],
     "merged": true,
-    "number": 44325,
+    "number": 44381,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[timesfm2_5] fix timesfm mlp bias",
-    "updated_at": "2026-02-27T13:36:13Z"
+    "title": "Add MLU bf16 support to is_torch_bf16_gpu_available",
+    "updated_at": "2026-03-06T14:34:30Z"
   },
   {
-    "additions": 16,
-    "author": "tonglei19961121",
-    "author_association": "NONE",
-    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. This PR addresses the first TODO item in #18926. Changes: - Added document-question-answering task example to pipeline_tutori\u2026",
-    "changed_files": 2,
+    "additions": 579,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary ! This PR is in draft, waiting for https://github.com/huggingface/transformers/pull/44227 to be merged This PR adds support for the `flash_attention_with_kvcache` kernel in continuoys batching. This is very efficient for decode-o\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44324",
-    "created_at": "2026-02-27T12:29:06Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44379",
+    "created_at": "2026-03-01T23:13:17Z",
+    "deletions": 235,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44324/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44324",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44324,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44379/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44379",
+    "labels": [],
+    "merged": true,
+    "number": 44379,
+    "review_comments_count": 19,
     "state": "closed",
-    "title": "docs: Add document-question-answering example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:35:23Z"
+    "title": "[CB] Add paged_attention kernel",
+    "updated_at": "2026-03-09T22:16:31Z"
   },
   {
-    "additions": 16,
-    "author": "tonglei19961121",
-    "author_association": "NONE",
-    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. Fixes #18926",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44323",
-    "created_at": "2026-02-27T12:26:00Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44378",
+    "created_at": "2026-03-01T22:57:50Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44323/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44323",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44378/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44378",
+    "labels": [],
     "merged": false,
-    "number": 44323,
+    "number": 44378,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add document-question-answering example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:38:43Z"
+    "title": "Fix logging with each layer with ms-swift lora fine-tuning ",
+    "updated_at": "2026-03-02T14:18:22Z"
   },
   {
-    "additions": 12,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@vasqu This PR skipped 2 invalid test cases: ``` tests/models/voxtral_realtime/test_modeling_voxtral_realtime.py::VoxtralRealtimeForConditionalGenerationModelTest::test_generate_with_quant_cache tests/models/voxtral_realtime/test_modeling_\u2026",
+    "additions": 175,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes TODO: Implement proper TP support for compressed tensors quantization <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release not\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44321",
-    "created_at": "2026-02-27T09:54:14Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44377",
+    "created_at": "2026-03-01T22:46:07Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44321/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44321",
+    "files_url": "https://github.com/huggingface/transformers/pull/44377/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44377",
     "labels": [],
-    "merged": true,
-    "number": 44321,
+    "merged": false,
+    "number": 44377,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "skip 2 invalid test cases for voxtral_realtime model",
-    "updated_at": "2026-04-02T03:22:04Z"
+    "title": "Implement Tensor Parallelism (TP) support for compressed tensors quantization",
+    "updated_at": "2026-03-02T14:15:47Z"
   },
   {
-    "additions": 5273,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds [SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation](https://huggingface.co/papers/2602.12173). Fixes #44205",
-    "changed_files": 22,
+    "additions": 4,
+    "author": "stuckvgn",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary The quick-start code examples in `flan-t5.md` and `flan-ul2.md` use `\"A step by step recipe to make bolognese pasta:\"` as the demo prompt, with output that includes `ground beef`. This PR replaces it with `\"A step by step recipe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44320",
-    "created_at": "2026-02-27T08:29:00Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44376",
+    "created_at": "2026-03-01T17:41:48Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44320/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44320",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44376/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44376",
+    "labels": [],
     "merged": false,
-    "number": 44320,
-    "review_comments_count": 53,
-    "state": "open",
-    "title": "Add SAM3-LiteText",
-    "updated_at": "2026-04-12T11:23:39Z"
+    "number": 44376,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: update Flan-T5 and Flan-UL2 example to use plant-based recipe prompt",
+    "updated_at": "2026-03-14T06:47:36Z"
   },
   {
-    "additions": 74,
-    "author": "IlyasMoutawwakil",
+    "additions": 6829,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR adds RF-DETR using Codex 5.3. It did everything: modular (in 600 lines of code), fast and slow image processors, conversion script with bells and whistles (setting `id2label` etc.) To do: - [x] verify loss c\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44319",
-    "created_at": "2026-02-27T08:20:45Z",
-    "deletions": 56,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44375",
+    "created_at": "2026-03-01T17:32:17Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44319/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44319",
+    "files_url": "https://github.com/huggingface/transformers/pull/44375/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44375",
     "labels": [],
-    "merged": true,
-    "number": 44319,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Support non-gated experts",
-    "updated_at": "2026-03-02T19:26:38Z"
+    "merged": false,
+    "number": 44375,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add RF-DETR",
+    "updated_at": "2026-03-05T16:00:53Z"
   },
   {
-    "additions": 10,
-    "author": "yoginlangalia",
+    "additions": 4,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Adds input validation for bounding box shape in `LayoutLMv3Tokenizer`. When users pass boxes with fewer (or more) than 4 values per box, the tokenizer now raises a clear `ValueError` instead of a confusing generic\u2026",
+    "body_excerpt": "## What does this PR do? Fixes #44373 The `position_ids` parameter docstrings in `_get_unpad_data()` (line 360) and `_upad_input()` (line 413) in `src/transformers/modeling_flash_attention_utils.py` were incorrectly describing `attention_m\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44318",
-    "created_at": "2026-02-27T06:40:02Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44374",
+    "created_at": "2026-03-01T17:07:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44318/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44318",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44374/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44374",
+    "labels": [],
     "merged": false,
-    "number": 44318,
+    "number": 44374,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Validate bounding box shape in LayoutLMv3Tokenizer",
-    "updated_at": "2026-02-27T14:43:08Z"
+    "title": "Fix incorrect position_ids docstring in modeling_flash_attention_utils.py",
+    "updated_at": "2026-03-02T14:06:30Z"
   },
   {
-    "additions": 4,
-    "author": "sxu75374",
+    "additions": 5,
+    "author": "N3u0ns",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Passes `config.rms_norm_eps` explicitly to `q_a_layernorm` and `kv_a_layernorm` in both DeepSeek V2 and V3 MLA attention. Currently these two norms are constructed without `eps`, falling back to the `RMSNorm` class\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #44355 `inspect.getsource()` fails with `TypeError` when called on Cython-compiled functions or built-in functions that don't have Python source code. This adds a try/except block to gracefully handle this case by returnin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44317",
-    "created_at": "2026-02-27T04:48:08Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44372",
+    "created_at": "2026-03-01T13:53:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44317/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44317",
+    "files_url": "https://github.com/huggingface/transformers/pull/44372/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44372",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44317,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(deepseek): pass rms_norm_eps to MLA q/kv layernorms",
-    "updated_at": "2026-02-27T14:30:04Z"
-  },
-  {
-    "additions": 2,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44303 The weight loading progress bar called `pbar.refresh()` on every single parameter, bypassing tqdm's built-in rate-limiting. When output is redirected to a log file (e.g. in CI), this produced one line per parameter -- hundreds\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44316",
-    "created_at": "2026-02-27T03:08:28Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44316/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44316",
-    "labels": [],
-    "merged": false,
-    "number": 44316,
+    "number": 44372,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Reduce tqdm verbosity during weight loading",
-    "updated_at": "2026-03-03T17:02:34Z"
+    "state": "closed",
+    "title": "fix: handle Cython-compiled functions in get_docstring_indentation_level",
+    "updated_at": "2026-03-02T13:39:50Z"
   },
   {
-    "additions": 3484,
-    "author": "jp1924",
+    "additions": 12,
+    "author": "leaderofARS",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hello, Transformers team! I submitted a PR to add naver-hyperclovax/HyperCLOVAX-SEED-Think-32B (hereafter HCX), developed by the Korean IT company Naver while executing the government's national AI model project. Th\u2026",
-    "changed_files": 24,
+    "body_excerpt": "# Fix documentation inconsistencies in integrations folder ## Description This PR addresses documentation errors and inconsistencies across the integrations module, specifically clarifying terminology and deprecation status in two key inte\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 22,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44314",
-    "created_at": "2026-02-27T02:01:28Z",
-    "deletions": 18,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44369",
+    "created_at": "2026-03-01T07:34:43Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44314/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44314",
+    "files_url": "https://github.com/huggingface/transformers/pull/44369/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44369",
     "labels": [],
     "merged": false,
-    "number": 44314,
-    "review_comments_count": 77,
+    "number": 44369,
+    "review_comments_count": 7,
     "state": "open",
-    "title": "add HyperClovaX Vision",
-    "updated_at": "2026-04-13T02:23:53Z"
+    "title": "Feature/integrations docs fix",
+    "updated_at": "2026-03-06T19:47:39Z"
   },
   {
-    "additions": 4,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes the same `TypeError: AddedToken() got multiple values for keyword argument 'special'` that #44281 addressed, but for the `extra_special_tokens` code path which was missed. #44281 (commit 8e663c7) correctly added `value.pop(\"special\",\u2026",
-    "changed_files": 1,
+    "additions": 171,
+    "author": "jayakumarpujar",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #43701 Models with `_checkpoint_conversion_mapping` (e.g. VLMs like Qwen2.5VL, LLaVA, ColPali, etc.) use a key renaming system: - **Loading** (`from_pretrained`): Checkpoint keys are renamed from original format \u2192 model fo\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44313",
-    "created_at": "2026-02-27T01:37:45Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44366",
+    "created_at": "2026-03-01T03:43:16Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44313/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44313",
+    "files_url": "https://github.com/huggingface/transformers/pull/44366/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44366",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44313,
+    "number": 44366,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AddedToken duplicate 'special' kwarg for extra_special_tokens",
-    "updated_at": "2026-02-27T14:26:28Z"
+    "title": "Fix resume_from_checkpoint key mismatch for models with _checkpoint_conversion_mapping",
+    "updated_at": "2026-03-02T14:02:22Z"
   },
   {
     "additions": 8,
-    "author": "haosenwang1018",
+    "author": "jayakumarpujar",
     "author_association": "NONE",
-    "body_excerpt": "Replace bare `except:` clauses with `except Exception:` for PEP 8 compliance.",
-    "changed_files": 4,
+    "body_excerpt": "## Summary - Fixes #44360 - The reference DeepSeek-V3.2 `fp8_index` kernel applies **ReLU** to per-head q\u00b7k scores before weighting and summing across heads: ``` logits[i3_n, i_h] = T.max(logits[i3_n, i_h], 0) * q_s_frag[i_h] ``` [Referenc\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44360-6",
+    "cluster_ids": [
+      "cluster-44360-6"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44364",
+    "created_at": "2026-03-01T02:19:14Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44364/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44364",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44364,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
+    "updated_at": "2026-03-02T13:55:48Z"
+  },
+  {
+    "additions": 57,
+    "author": "jayakumarpujar",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes #44327 - `decode_spans()` in the QA pipeline crashes with `ValueError: kth(=N) out of bounds (N)` when `len(scores_flat) == topk` (e.g., `top_k=100` with `seq_len=10`, since `10\u00b2 = 100`) - Root cause: `np.argpartition(ar\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44312",
-    "created_at": "2026-02-27T01:00:33Z",
-    "deletions": 8,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44363",
+    "created_at": "2026-03-01T01:47:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44312/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44312",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44363/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44363",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44312,
+    "number": 44363,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace 8 bare except clauses with except Exception",
-    "updated_at": "2026-02-27T03:27:27Z"
+    "title": "Fix off-by-one in decode_spans causing ValueError with np.argpartition",
+    "updated_at": "2026-03-02T13:03:02Z"
   },
   {
-    "additions": 38,
-    "author": "onel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds species bias documentation across the transformers repository to help model authors and users recognize and address potential biases in language models. The updates include guidance on documenting bias categori\u2026",
-    "changed_files": 5,
+    "additions": 4,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 [MIGRATION_GUIDE_V5.md](https://github.com/harshaljanjani/transformers/blob/main/MIGRATION_GUIDE_V5.md) states that v5 renamed `additional_\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44311",
-    "created_at": "2026-02-27T00:02:49Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44362",
+    "created_at": "2026-02-28T20:04:05Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44311/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44311",
+    "files_url": "https://github.com/huggingface/transformers/pull/44362/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44362",
     "labels": [],
-    "merged": false,
-    "number": 44311,
+    "merged": true,
+    "number": 44362,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add species bias documentation to model cards and docs",
-    "updated_at": "2026-02-27T14:09:20Z"
+    "title": "fix(tokenizer): Fix MLukeTokenizer AttributeError post-v5 refactor",
+    "updated_at": "2026-03-02T14:51:18Z"
   },
   {
-    "additions": 63,
-    "author": "onel",
+    "additions": 341,
+    "author": "sxu75374",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds documentation for Pruna AI integration to the Transformers ecosystem, following the existing pattern used by vLLM and Unsloth integration docs. ## Changes - Created `docs/source/en/community_integrations/pruna.\u2026",
+    "body_excerpt": "# What does this PR do? Adds a workaround for the PyTorch MPS `sdpa_vector_2pass_mps` correctness bug ([pytorch/pytorch#174861](https://github.com/pytorch/pytorch/issues/174861)). **The problem:** On Apple Silicon with MPS backend, `F.scal\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44310",
-    "created_at": "2026-02-27T00:00:31Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44359",
+    "created_at": "2026-02-28T17:47:01Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44310/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44310",
+    "files_url": "https://github.com/huggingface/transformers/pull/44359/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44359",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44310,
+    "number": 44359,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add Pruna AI integration documentation",
-    "updated_at": "2026-02-27T14:08:21Z"
+    "title": "fix(sdpa): add workaround for MPS sdpa_vector_2pass_mps correctness bug",
+    "updated_at": "2026-03-02T13:54:58Z"
   },
   {
-    "additions": 129,
-    "author": "onel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds SkyPilot deployment documentation to the DeepSpeed guide. The new section includes: - Introduction to SkyPilot as a unified framework for running AI workloads across clouds and Kubernetes - Complete example YAM\u2026",
+    "additions": 6,
+    "author": "hardikmeisheri",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary - `ShieldGemma2ForImageClassification` was missing `_tied_weights_keys`, so `model.lm_head.weight` was randomly re-initialized on every `from_pretrained` call instead of being tied to `embed_tokens.weight`. - This caused non-det\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44309",
-    "created_at": "2026-02-26T22:44:41Z",
-    "deletions": 0,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44358",
+    "created_at": "2026-02-28T16:49:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44309/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44309",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44309,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44358/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44358",
+    "labels": [],
+    "merged": true,
+    "number": 44358,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add SkyPilot deployment documentation to DeepSpeed guide",
-    "updated_at": "2026-02-27T14:09:55Z"
+    "title": "Fix ShieldGemma2 non-reproducible outputs by adding _tied_weights_keys",
+    "updated_at": "2026-03-16T20:02:09Z"
   },
   {
-    "additions": 5854,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
+    "additions": 482,
+    "author": "NabilMch",
+    "author_association": "NONE",
     "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
+    "changed_files": 127,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44308",
-    "created_at": "2026-02-26T21:26:05Z",
-    "deletions": 9,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44357",
+    "created_at": "2026-02-28T15:11:37Z",
+    "deletions": 489,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44308/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44308",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44357/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44357",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44308,
+    "number": 44357,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Codex/add sam3 litetext model to transformers fuvllg",
-    "updated_at": "2026-02-26T21:35:44Z"
+    "title": "Fix RoPE inv_freq default initialization (Issue #39753)",
+    "updated_at": "2026-03-02T13:50:00Z"
   },
   {
-    "additions": 7,
-    "author": "imstevenpmwork",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes a `TypeError: not all arguments converted during string formatting` caused by incorrectly passing `FutureWarning` as a second argument to `logger.warning_once()` in this file, introduced in https://git\u2026",
+    "additions": 6,
+    "author": "iamaber",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Checks if model is already in target dtype before casting to avoid redundant copies that cause 25% performance degradation with `--fp16_full_eval`. ## Changes - Added dtype check before casting model to fp16/bf16 in `evaluation_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44307",
-    "created_at": "2026-02-26T20:09:20Z",
-    "deletions": 10,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44356",
+    "created_at": "2026-02-28T14:24:32Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44307/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44307",
-    "labels": [],
-    "merged": true,
-    "number": 44307,
-    "review_comments_count": 2,
+    "files_url": "https://github.com/huggingface/transformers/pull/44356/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44356",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44356,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(modeling_attn_mask_utils): remove FutureWarning from logger.warning_once()",
-    "updated_at": "2026-02-26T21:29:01Z"
+    "title": "fix: avoid redundant fp16/bf16 model casts in evaluation_loop",
+    "updated_at": "2026-03-02T13:59:38Z"
   },
   {
-    "additions": 10,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a crash in `PretrainedConfig.update_from_string()` when the input string contains entries without `=` or with multiple `=` signs. **`configuration_utils.py`** - The existing code `dict(x.split(\"=\") for x in up\u2026",
+    "additions": 73,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes an off-by-one error in `decode_spans()` where `np.argpartition` is called with `kth == len(arr)` when `topk` equals the number of candidate scores. This raises `ValueError: kth(=N) out of bounds (N)`. **Root\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44306",
-    "created_at": "2026-02-26T20:02:10Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44354",
+    "created_at": "2026-02-28T08:46:39Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44306/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44306",
+    "files_url": "https://github.com/huggingface/transformers/pull/44354/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44354",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44306,
+    "number": 44354,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ValueError crash in PretrainedConfig.update_from_string on malformed input",
-    "updated_at": "2026-02-27T14:25:03Z"
+    "title": "fix: off-by-one in decode_spans causes ValueError when topk == len(scores)",
+    "updated_at": "2026-03-02T13:02:38Z"
   },
   {
-    "additions": 7,
-    "author": "jashshah999",
+    "additions": 50,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a mutable default argument and two resource leaks: 1. **`integrations/tpu.py`** - `patched_optimizer_step` used `optimizer_args={}` as a default parameter. Mutable defaults are shared across calls, so any muta\u2026",
-    "changed_files": 3,
+    "body_excerpt": "@IlyasMoutawwakil , pls help review, thx!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44305",
-    "created_at": "2026-02-26T19:22:33Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44353",
+    "created_at": "2026-02-28T07:50:37Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44305/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44305",
+    "files_url": "https://github.com/huggingface/transformers/pull/44353/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44353",
+    "labels": [],
+    "merged": true,
+    "number": 44353,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "add expectations for xpu for olmo_hybrid model",
+    "updated_at": "2026-04-02T03:22:06Z"
+  },
+  {
+    "additions": 13,
+    "author": "giulio-leone",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module emitted **bold/italic ANSI escape codes** even when `stdout` was not connected to a terminal (e.g. piped or redirected output). While `_color()` already gated color codes behind `sys.stdo\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44336-7",
+    "cluster_ids": [
+      "cluster-44336-7"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44352",
+    "created_at": "2026-02-28T06:22:19Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44352/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44352",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44305,
+    "number": 44352,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix mutable default in TPU optimizer and unclosed file handles",
-    "updated_at": "2026-02-27T14:21:58Z"
+    "title": "fix: suppress ANSI escape codes when stdout is not a terminal",
+    "updated_at": "2026-03-02T13:59:15Z"
   },
   {
-    "additions": 151,
-    "author": "adil-a",
+    "additions": 7,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds NeMo Automodel under the community integrations tab. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contribut\u2026",
-    "changed_files": 3,
+    "body_excerpt": null,
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44304",
-    "created_at": "2026-02-26T17:57:16Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44350",
+    "created_at": "2026-02-28T03:20:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44304/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44304",
+    "files_url": "https://github.com/huggingface/transformers/pull/44350/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44350",
     "labels": [],
     "merged": true,
-    "number": 44304,
-    "review_comments_count": 9,
+    "number": 44350,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "docs: Add NeMo Automodel community integration docs",
-    "updated_at": "2026-03-03T16:51:48Z"
+    "title": "skip 1 invalid test case for higgs_audio_v2",
+    "updated_at": "2026-04-09T02:32:42Z"
   },
   {
-    "additions": 28,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Integrate the neuron device to TrainingArguments. It enables using the neuron device with the `Trainer` class.",
+    "additions": 49,
+    "author": "zzc0430",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? > Inspired by https://github.com/huggingface/transformers/pull/44347#issuecomment-3976028358 Fixes `transformers serve` failing with hybrid models like Qwen3.5 that use `linear_attention` layers. Two issues are addr\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44302",
-    "created_at": "2026-02-26T15:11:09Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44349",
+    "created_at": "2026-02-28T03:09:30Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44302/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44302",
+    "files_url": "https://github.com/huggingface/transformers/pull/44349/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44349",
     "labels": [],
-    "merged": true,
-    "number": 44302,
+    "merged": false,
+    "number": 44349,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Integrate the Neuron device to TrainingArguments",
-    "updated_at": "2026-03-05T15:11:00Z"
+    "title": "fix: support linear_attention in continuous batching and fix serve ch\u2026",
+    "updated_at": "2026-03-02T13:48:04Z"
   },
   {
-    "additions": 30,
-    "author": "likejazz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? When fine-tuning Qwen3 with frameworks like TRL, `<think>` blocks are silently omitted from the token sequence, causing chain-of-thought reasoning data to be completely lost during training with no error or warning\u2026",
-    "changed_files": 1,
+    "additions": 341,
+    "author": "n0kovo",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Most quantized models for Apple Silicon on the Hub are in MLX format. The `MetalConfig` quantization backend supports on-the-fly quantization of standard checkpoints but cannot load pre-quantized MLX models. This PR fixes the fi\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44301",
-    "created_at": "2026-02-26T14:30:24Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44348",
+    "created_at": "2026-02-28T00:24:32Z",
+    "deletions": 32,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44301/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44301",
+    "files_url": "https://github.com/huggingface/transformers/pull/44348/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44348",
     "labels": [],
     "merged": false,
-    "number": 44301,
+    "number": 44348,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Qwen3 `<think>` blocks not written during fine-tuning (TRL)",
-    "updated_at": "2026-03-02T17:18:03Z"
-  },
-  {
-    "additions": 2539,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The recursive feature is needed for me in https://github.com/huggingface/transformers/pull/44252 to allow timm backbone define its conversion only once. Also it currently allows to delete \"t5gemma2\" from conversion,\u2026",
-    "changed_files": 18,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 44,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44300",
-    "created_at": "2026-02-26T14:09:59Z",
-    "deletions": 470,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44300/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44300",
-    "labels": [],
-    "merged": true,
-    "number": 44300,
-    "review_comments_count": 45,
-    "state": "closed",
-    "title": "Dynamic weight conversion is recursive",
-    "updated_at": "2026-03-26T11:59:06Z"
+    "state": "open",
+    "title": "Enable MetalConfig to load pre-quantized MLX models from HuggingFace Hub",
+    "updated_at": "2026-03-02T17:18:46Z"
   },
   {
-    "additions": 520,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, to allow for proper alignment with vllm/sglang Closes #44258",
-    "changed_files": 21,
+    "additions": 49,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `AttributeError` when using continuous batching with composite model configs (e.g. `Qwen3_5Config` for vision-language models). Composite configs store attributes like `num_attention_heads` and `num_key_value\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44299",
-    "created_at": "2026-02-26T12:50:03Z",
-    "deletions": 282,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44347",
+    "created_at": "2026-02-27T22:48:49Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44299/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44299",
-    "labels": [],
-    "merged": true,
-    "number": 44299,
-    "review_comments_count": 4,
+    "files_url": "https://github.com/huggingface/transformers/pull/44347/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44347",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44347,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": ":rotating_light: [`Ernie 4.5 VL Moe`] Fix up namings to vllm/sglang convention",
-    "updated_at": "2026-02-26T16:42:50Z"
+    "title": "fix: resolve composite config in PagedAttentionCache and group_layers_by_attn_type",
+    "updated_at": "2026-03-02T13:41:23Z"
   },
   {
-    "additions": 1145,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A few issues we did not catch: - https://github.com/huggingface/transformers/blob/47b0e478f324b54f177ea7998a0791870fdd0324/src/transformers/convert_slow_tokenizer.py#L1314-L1315 missing from `GemmaTokenier` - SPM's\u2026",
-    "changed_files": 7,
+    "additions": 4,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes the `q_a_layernorm` and `kv_a_layernorm` in DeepSeek V2/V3 MLA attention to explicitly receive `config.rms_norm_eps` instead of falling back to the RMSNorm class default (`1e-6`). **The problem:** All other RM\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44298",
-    "created_at": "2026-02-26T12:34:38Z",
-    "deletions": 29,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44298/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44298",
-    "labels": [],
-    "merged": false,
-    "number": 44298,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Auto detect wrong mapping models",
-    "updated_at": "2026-03-02T10:13:28Z"
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44346",
+    "created_at": "2026-02-27T21:47:45Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44346/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44346",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44346,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(deepseek): pass config.rms_norm_eps to MLA q/kv layernorms",
+    "updated_at": "2026-03-02T13:26:21Z"
   },
   {
-    "additions": 12302,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Needed for https://github.com/huggingface/transformers/pull/41250 to pass the docstring-checker in CI. Our basic checker doesn't handle well dataclasses so we can use `autodocstring`",
-    "changed_files": 512,
+    "additions": 13,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44303 When redirecting `from_pretrained` output to a log file (e.g. in CI), the \"Loading weights\" tqdm bar was updating its postfix with `Materializing param=...` on every parameter, producing huge log files. ## Change -\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44296",
-    "created_at": "2026-02-26T10:29:21Z",
-    "deletions": 37860,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44345",
+    "created_at": "2026-02-27T21:05:22Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44296/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44296",
-    "labels": [],
-    "merged": true,
-    "number": 44296,
-    "review_comments_count": 11,
+    "files_url": "https://github.com/huggingface/transformers/pull/44345/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44345",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44345,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add auto-docstring on configs",
-    "updated_at": "2026-03-06T11:58:10Z"
+    "title": "Less verbose weight-loading tqdm when stdout is not a TTY (fixes #44303)",
+    "updated_at": "2026-03-02T13:49:11Z"
   },
   {
-    "additions": 1,
-    "author": "mario-sanz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 6,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44297 Qwen3.5 models on the Hub (e.g. [Qwen/Qwen3.5-27B](https://huggingface.co/Qwen/Qwen3.5-27B)) use `\"tokenizer_class\": \"Qwen2Tokenizer\"` in `tokenizer_config.json`, but `TOKENIZER_MAPPING_NAMES` had `qwen3_5` \u2192 `\"Qwen\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44344",
+    "created_at": "2026-02-27T21:04:27Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44344/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44344",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44344,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix tokenizer_class in tokenizer_config.json for Qwen3.5 save_pretrained (fixes #44297)",
+    "updated_at": "2026-03-02T13:17:41Z"
+  },
+  {
+    "additions": 16,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44336 `utils/loading_report.py` was emitting ANSI codes for **bold** and *italic* via `PALETTE['bold']` and `PALETTE['italic']` without checking if stdout is connected to a terminal. `_color()` already respects `sys.stdou\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44336-7",
+    "cluster_ids": [
+      "cluster-44336-7"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44294",
-    "created_at": "2026-02-26T08:30:52Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44343",
+    "created_at": "2026-02-27T20:58:33Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44294/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44294",
-    "labels": [],
-    "merged": true,
-    "number": 44294,
+    "files_url": "https://github.com/huggingface/transformers/pull/44343/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44343",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44343,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: use `TokenizersBackend` for Olmo3 to preserve custom `pre_tokenizer`",
-    "updated_at": "2026-02-26T10:35:44Z"
+    "title": "Fix ANSI codes in loading_report when stdout is not a TTY (fixes #44336)",
+    "updated_at": "2026-03-02T13:44:43Z"
   },
   {
-    "additions": 13,
-    "author": "IlyasMoutawwakil",
+    "additions": 384,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
+    "body_excerpt": "- created a new performance section divided into memory and speed optimizations - model memory training anatomy [guide](https://huggingface.co/docs/transformers/main/en/model_memory_anatomy) is now the more descriptive and simplified GPU m\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44293",
-    "created_at": "2026-02-26T08:25:23Z",
-    "deletions": 384,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44342",
+    "created_at": "2026-02-27T20:10:49Z",
+    "deletions": 274,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44293/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44293",
+    "files_url": "https://github.com/huggingface/transformers/pull/44342/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44342",
     "labels": [],
     "merged": true,
-    "number": 44293,
-    "review_comments_count": 7,
+    "number": 44342,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Strict export cleanup",
-    "updated_at": "2026-03-02T09:36:19Z"
+    "title": "[docs] training performance",
+    "updated_at": "2026-04-09T20:43:32Z"
   },
   {
-    "additions": 548,
-    "author": "stevhliu",
+    "additions": 12,
+    "author": "Kokonico",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44336 ## Changes * Added a new `_palette` function to return the ANSI code for a given color or format only if `sys.stdout` is interactive. (`src/transformers/utils/loading_report.py`) * Updated all usages of `PALETTE[<format>]` in\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44336-7",
+    "cluster_ids": [
+      "cluster-44336-7"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44341",
+    "created_at": "2026-02-27T19:30:30Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44341/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44341",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44341,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix and optimize ANSI color handling in loading report for interactive terminals",
+    "updated_at": "2026-03-02T18:16:00Z"
+  },
+  {
+    "additions": 33,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "- adds `trainer_recipes.md` to show how to use other practical `Trainer` features outside of the basic training loop - updates hyperparam search docs - updates `optimizers.md` with how to customize it (prebuilt instances, passing a class +\u2026",
-    "changed_files": 6,
+    "body_excerpt": "Some speculative tests seem flaky with SDPA but reliable with `eager` attention. In local testing, `test_speculative_decoding_equals_regular_decoding` fails 5-10% of the time without this change. and I also saw CI failures. Failures are re\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44290",
-    "created_at": "2026-02-26T01:02:15Z",
-    "deletions": 210,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44340",
+    "created_at": "2026-02-27T18:09:09Z",
+    "deletions": 27,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44290/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44290",
+    "files_url": "https://github.com/huggingface/transformers/pull/44340/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44340",
     "labels": [],
     "merged": true,
-    "number": 44290,
-    "review_comments_count": 17,
+    "number": 44340,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "[docs] optimizers, hyperparam search, training features",
-    "updated_at": "2026-04-09T20:00:53Z"
+    "title": "Fix speculative tests that are flaky with SDPA",
+    "updated_at": "2026-03-02T17:18:27Z"
   },
   {
-    "additions": 8,
-    "author": "haosenwang1018",
-    "author_association": "NONE",
-    "body_excerpt": "Replace bare except clauses with except Exception.",
-    "changed_files": 4,
+    "additions": 6221,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 This PR adds **DEIMv2** to Transformers! \u2192 **IMP:** I've linked two notebooks: a [Colab notebook here](https://colab.research.google.com/drive/1jCNefxrKiHWdBEIYTcU3jsd9xyWDwIxC?usp=sharing) demonstrating the fun\u2026",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44289",
-    "created_at": "2026-02-26T00:58:35Z",
-    "deletions": 8,
+    "comments_count": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44339",
+    "created_at": "2026-02-27T18:08:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44289/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44289",
+    "files_url": "https://github.com/huggingface/transformers/pull/44339/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44339",
     "labels": [],
     "merged": false,
-    "number": 44289,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: replace 8 bare except clauses with except Exception",
-    "updated_at": "2026-02-26T12:52:48Z"
+    "number": 44339,
+    "review_comments_count": 199,
+    "state": "open",
+    "title": "model: Add DEIMv2 to Transformers",
+    "updated_at": "2026-04-14T08:37:47Z"
   },
   {
-    "additions": 1,
-    "author": "somAzzz",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? When loading the new Qwen 3.5 models (e.g., 'Qwen/Qwen3.5-35B-A3B') using the 'transformers' (5.3.0.dev0), the initialization crashes with a 'TypeError' . **Error Traceback Context:** (APIServer pid=98544) File \"...\u2026",
-    "changed_files": 1,
+    "additions": 3641,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR rework a bit how distributed tests are tested. I tried to keep some of the existing tests and added new tests also. For each of these distributed methods (ddp, fsdp, deepspeeed), we have some common tests li\u2026",
+    "changed_files": 38,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44288",
-    "created_at": "2026-02-25T22:43:41Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44338",
+    "created_at": "2026-02-27T17:50:16Z",
+    "deletions": 3762,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44288/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44288",
+    "files_url": "https://github.com/huggingface/transformers/pull/44338/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44338",
     "labels": [],
-    "merged": false,
-    "number": 44288,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44338,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in modeling_rope_utils.py when ignore_keys_at_rope_vali\u2026",
-    "updated_at": "2026-02-26T20:09:07Z"
+    "title": "Update distributed tests",
+    "updated_at": "2026-03-05T23:35:36Z"
   },
   {
-    "additions": 11,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes mutable default arguments and unclosed file handles across several files. **Mutable defaults** (can cause shared state across calls): - `debug_utils.py`: `DebugUnderflowOverflow.__init__` `trace_batch_nums=[]`\u2026",
-    "changed_files": 4,
+    "additions": 2,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- moves `kernels-community/flash-attn2:FlashAttention2` to `from_pretrained(attn_implementation...)` - fix error message for registering a kernel",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44287",
-    "created_at": "2026-02-25T22:23:20Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44337",
+    "created_at": "2026-02-27T17:36:54Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44287/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44287",
+    "files_url": "https://github.com/huggingface/transformers/pull/44337/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44337",
     "labels": [],
     "merged": true,
-    "number": 44287,
+    "number": 44337,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix mutable default arguments and resource leaks",
-    "updated_at": "2026-03-02T15:17:25Z"
+    "title": "[docs] kernelconfig fix",
+    "updated_at": "2026-02-27T22:46:30Z"
   },
   {
-    "additions": 31,
-    "author": "kathrynle20",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds on to the Fouroversix integration by adding support for quantized models such as the gpt-oss model by adding weight conversions and an additional config argument. Reference: https://github.com/huggingfa\u2026",
-    "changed_files": 3,
+    "additions": 57,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the RoFormer model to use the `@capture_outputs` and `@can_return_tuple` decorators, following the established pattern (similar to #44047 for Bloom, #44151 for BioGPT, etc.). ### Changes: - **`RoFormerMod\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44286",
-    "created_at": "2026-02-25T22:15:15Z",
-    "deletions": 14,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44335",
+    "created_at": "2026-02-27T17:23:01Z",
+    "deletions": 172,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44286/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44286",
+    "files_url": "https://github.com/huggingface/transformers/pull/44335/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44335",
     "labels": [],
-    "merged": true,
-    "number": 44286,
-    "review_comments_count": 12,
+    "merged": false,
+    "number": 44335,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add future model support for Fouroversix",
-    "updated_at": "2026-03-04T16:28:13Z"
+    "title": "Refactor RoFormer output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:29:59Z"
   },
   {
-    "additions": 3484,
+    "additions": 1,
     "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds the VidEoMT model, as described in [VidEoMT: Your ViT is Secretly Also a Video Segmentation Model](https://huggingface.co/papers/2602.17807). Gradio demo (running on ZeroGPU): https://huggingface.co/spa\u2026",
-    "changed_files": 17,
+    "body_excerpt": "# What does this PR do? I had some issues with running `transformers-cli add-new-model-like`. This PR fixes it. Fixes #44661.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 23,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44285",
-    "created_at": "2026-02-25T19:24:39Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44334",
+    "created_at": "2026-02-27T17:13:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44285/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44285",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44334/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44334",
+    "labels": [],
     "merged": true,
-    "number": 44285,
-    "review_comments_count": 57,
+    "number": 44334,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add VidEoMT",
-    "updated_at": "2026-03-25T17:05:47Z"
+    "title": "Fix CookieCutter",
+    "updated_at": "2026-03-13T17:13:28Z"
   },
   {
-    "additions": 388,
-    "author": "paipeline",
+    "additions": 13,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #44242 where Mixtral models do not compute auxiliary load balancing loss when `output_router_logits=False`, even when `router_aux_loss_coef > 0`. ## Problem According to the [Mixtral documentation](https://huggingface.\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## What does this PR do? Refactors the ALBERT model to use named attribute access instead of index-based access on model outputs, and removes redundant `return_dict=True` arguments from inner model calls (already handled by `@capture_outpu\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44284",
-    "created_at": "2026-02-25T18:38:15Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44333",
+    "created_at": "2026-02-27T17:12:25Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44284/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44284",
+    "files_url": "https://github.com/huggingface/transformers/pull/44333/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44333",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44284,
+    "number": 44333,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Mixtral auxiliary loss computation when output_router_logits=False",
-    "updated_at": "2026-02-26T12:41:46Z"
+    "title": "Refactor ALBERT to use named attributes and remove redundant return_dict=True",
+    "updated_at": "2026-03-02T13:05:54Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Accidentally caused by #43325, wrong naming --> modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "tysoncung",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix minor typos found in comments and docstrings: - `orignal` \u2192 `original` in `src/transformers/integrations/peft.py` (lines 245, 284) - Duplicate word `is is` \u2192 `is` in `src/transformers/models/dia/processing_dia.py` (line 89) Small clean\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44283",
-    "created_at": "2026-02-25T18:33:17Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44332",
+    "created_at": "2026-02-27T16:11:46Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44283/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44283",
+    "files_url": "https://github.com/huggingface/transformers/pull/44332/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44332",
     "labels": [],
     "merged": true,
-    "number": 44283,
+    "number": 44332,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Modular`] Fix file type regression",
-    "updated_at": "2026-02-25T20:04:41Z"
+    "title": "Fix typos in comments and docstrings",
+    "updated_at": "2026-02-27T18:02:59Z"
   },
   {
-    "additions": 5,
-    "author": "Rocketknight1",
+    "additions": 33,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? fixed the bfloat16 dtype mismatch and Loss computation shape mismatch. Also added tests for these. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to ap\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44282",
-    "created_at": "2026-02-25T17:57:54Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44331",
+    "created_at": "2026-02-27T15:46:08Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44282/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44282",
+    "files_url": "https://github.com/huggingface/transformers/pull/44331/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44331",
     "labels": [],
     "merged": true,
-    "number": 44282,
-    "review_comments_count": 0,
+    "number": 44331,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "Restore response_schema saving-loading",
-    "updated_at": "2026-02-25T18:27:22Z"
+    "title": "[timesfm2_5] fix timesfm2.5 loss",
+    "updated_at": "2026-03-03T17:22:56Z"
   },
   {
-    "additions": 1,
-    "author": "ArthurZucker",
+    "additions": 289,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Its a very small fix for #44062",
+    "body_excerpt": "# What does this PR do? As per the title! Follow-up of https://github.com/huggingface/transformers/pull/44181 with more models!",
+    "changed_files": 136,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44330",
+    "created_at": "2026-02-27T15:33:02Z",
+    "deletions": 1682,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44330/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44330",
+    "labels": [],
+    "merged": true,
+    "number": 44330,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "Remove `cache_position` in more models",
+    "updated_at": "2026-03-11T14:47:50Z"
+  },
+  {
+    "additions": 3,
+    "author": "linfeng-du",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44281",
-    "created_at": "2026-02-25T16:28:37Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44329",
+    "created_at": "2026-02-27T15:27:39Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44329/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44329",
+    "labels": [],
+    "merged": true,
+    "number": 44329,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Enable Liger Kernel when doing hyperparameter search.",
+    "updated_at": "2026-03-03T13:44:56Z"
+  },
+  {
+    "additions": 92,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds guidelines for agents when it comes to add/run trainer tests. This needs to be updated as we modify, refactor the code !",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44328",
+    "created_at": "2026-02-27T15:17:24Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44281/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44281",
+    "files_url": "https://github.com/huggingface/transformers/pull/44328/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44328",
     "labels": [],
     "merged": true,
-    "number": 44281,
-    "review_comments_count": 0,
+    "number": 44328,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix special token maps BC",
-    "updated_at": "2026-02-26T10:34:17Z"
+    "title": "Add testing guide for agents for trainer tests",
+    "updated_at": "2026-02-27T17:32:11Z"
   },
   {
-    "additions": 614,
-    "author": "RishabhMehra",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026",
-    "changed_files": 3,
+    "additions": 38,
+    "author": "overcastbulb",
+    "author_association": "NONE",
+    "body_excerpt": "Adds missing pipeline tutorial example for zero-shot-classification following the existing format of other task examples in pipeline_tutorial.md. Related: #18926",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44278",
-    "created_at": "2026-02-25T12:49:56Z",
-    "deletions": 63,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44326",
+    "created_at": "2026-02-27T14:37:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44278/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44278",
+    "files_url": "https://github.com/huggingface/transformers/pull/44326/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44326",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44278,
+    "number": 44326,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[FEAT] Pipelines - Faster group_entities",
-    "updated_at": "2026-02-25T13:54:58Z"
+    "state": "closed",
+    "title": "docs: Add zero-shot-classification example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:46:24Z"
   },
   {
-    "additions": 105,
-    "author": "tarekziade",
+    "additions": 4,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch makes the GLM-ASR doc example runnable by using `runnables` - see https://github.com/huggingface/doc-builder/blob/main/docs/runnable-code-blocks.md",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? The `CLIPMLP` has the bias set to True but timesFM 2.5 uses `bias=False` in the pretrained model <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 36,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44277",
-    "created_at": "2026-02-25T08:49:20Z",
-    "deletions": 19,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44325",
+    "created_at": "2026-02-27T13:18:40Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44277/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44277",
+    "files_url": "https://github.com/huggingface/transformers/pull/44325/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44325",
     "labels": [],
     "merged": true,
-    "number": 44277,
-    "review_comments_count": 6,
+    "number": 44325,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Use doc-builder runnable example for GLM-ASR",
-    "updated_at": "2026-04-02T16:16:55Z"
+    "title": "[timesfm2_5] fix timesfm mlp bias",
+    "updated_at": "2026-02-27T13:36:13Z"
   },
   {
-    "additions": 0,
-    "author": "vishalpatil-45",
+    "additions": 16,
+    "author": "tonglei19961121",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR addresses the performance regression where `import transformers` takes ~3.5s. The issue was caused by eager imports of heavy backend libraries (like torch/numpy) during the initial module load. By moving the\u2026",
-    "changed_files": 0,
+    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. This PR addresses the first TODO item in #18926. Changes: - Added document-question-answering task example to pipeline_tutori\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44275",
-    "created_at": "2026-02-25T08:27:32Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44324",
+    "created_at": "2026-02-27T12:29:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44275/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44275",
+    "files_url": "https://github.com/huggingface/transformers/pull/44324/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44324",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44275,
+    "number": 44324,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Fix] Restore lazy loading to improve import performance (#44273)",
-    "updated_at": "2026-02-25T20:37:18Z"
+    "title": "docs: Add document-question-answering example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:35:23Z"
   },
   {
-    "additions": 559,
-    "author": "paipeline",
+    "additions": 16,
+    "author": "tonglei19961121",
     "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #44242 This PR resolves an issue where the auxiliary load balancing loss was not computed when `output_router_logits=False`, even when `router_aux_loss_coef != 0`. ## Problem The auxiliary loss computation was incorrec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. Fixes #18926",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44274",
-    "created_at": "2026-02-25T06:38:02Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44323",
+    "created_at": "2026-02-27T12:26:00Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44274/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44274",
+    "files_url": "https://github.com/huggingface/transformers/pull/44323/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44323",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44274,
+    "number": 44323,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix auxiliary load balancing loss computation when output_router_logits=False",
-    "updated_at": "2026-02-25T13:36:03Z"
+    "title": "docs: Add document-question-answering example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:38:43Z"
   },
   {
-    "additions": 1,
-    "author": "hangjun-ezra",
+    "additions": 12,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `TypeError: unsupported operand type(s) for |: 'list' and 'set'` in `RotaryEmbeddingConfigMixin.convert_rope_params_to_dict` when `ignore_keys_at_rope_validation` is a `list` instead of a `set`. ### Root ca\u2026",
+    "body_excerpt": "@vasqu This PR skipped 2 invalid test cases: ``` tests/models/voxtral_realtime/test_modeling_voxtral_realtime.py::VoxtralRealtimeForConditionalGenerationModelTest::test_generate_with_quant_cache tests/models/voxtral_realtime/test_modeling_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44272",
-    "created_at": "2026-02-25T03:52:04Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44321",
+    "created_at": "2026-02-27T09:54:14Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44272/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44272",
+    "files_url": "https://github.com/huggingface/transformers/pull/44321/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44321",
     "labels": [],
     "merged": true,
-    "number": 44272,
+    "number": 44321,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in convert_rope_params_to_dict when ignore_keys is a list",
-    "updated_at": "2026-02-25T14:38:36Z"
+    "title": "skip 2 invalid test cases for voxtral_realtime model",
+    "updated_at": "2026-04-02T03:22:04Z"
   },
   {
-    "additions": 1272,
-    "author": "balak4",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Add GreedyLR, a metric-based adaptive learning rate scheduler that adjusts the learning rate during training based on the current loss - Based on [\"Dynamic Learning Rate Scheduling based on Loss Changes Leads to Faster Converg\u2026",
-    "changed_files": 10,
+    "additions": 5231,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds [SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation](https://huggingface.co/papers/2602.12173). Fixes #44205",
+    "changed_files": 22,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44271",
-    "created_at": "2026-02-25T01:40:57Z",
-    "deletions": 7,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44320",
+    "created_at": "2026-02-27T08:29:00Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44271/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44271",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44320/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44320",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44271,
-    "review_comments_count": 3,
+    "number": 44320,
+    "review_comments_count": 63,
     "state": "closed",
-    "title": "Add GreedyLR adaptive learning rate scheduler",
-    "updated_at": "2026-03-18T18:45:46Z"
-  },
-  {
-    "additions": 88,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A lot of ProcessorsKwargs have incorrect/unspecified type hints in their ProcessorsKwargs TypedDict for their images_kwargs attribute. Functionnaly, this did not cause issues as \"_merge_kwargs\" automatically picks u\u2026",
-    "changed_files": 44,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44270",
-    "created_at": "2026-02-25T00:11:31Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44270/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44270",
-    "labels": [],
-    "merged": false,
-    "number": 44270,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add correct typing to custom images_kwargs in ProcessorsKwargs",
-    "updated_at": "2026-02-25T01:12:06Z"
+    "title": "Add SAM3-LiteText",
+    "updated_at": "2026-04-13T18:37:07Z"
   },
   {
-    "additions": 30,
-    "author": "yonigozlan",
+    "additions": 74,
+    "author": "IlyasMoutawwakil",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This is a follow-up to https://github.com/huggingface/transformers/pull/43748, and will allow to have clickable links to the full modality kwargs when present in the docstring of a processor or image processor Cc @s\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44269",
-    "created_at": "2026-02-25T00:05:47Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44319",
+    "created_at": "2026-02-27T08:20:45Z",
+    "deletions": 56,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44269/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44269",
+    "files_url": "https://github.com/huggingface/transformers/pull/44319/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44319",
     "labels": [],
     "merged": true,
-    "number": 44269,
-    "review_comments_count": 0,
+    "number": 44319,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Add `ProcessingKwargs` `ImagesKwargs` etc. to docs",
-    "updated_at": "2026-02-27T19:03:15Z"
+    "title": "Support non-gated experts",
+    "updated_at": "2026-03-02T19:26:38Z"
   },
   {
-    "additions": 5,
-    "author": "ethanknights",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Some improvements to the `trainer.py` docs. ## Before submitting - [x] This PR fixes a typo or improves the docs. ## Who can review? Documentation: @stevhliu",
+    "additions": 10,
+    "author": "yoginlangalia",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Adds input validation for bounding box shape in `LayoutLMv3Tokenizer`. When users pass boxes with fewer (or more) than 4 values per box, the tokenizer now raises a clear `ValueError` instead of a confusing generic\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44268",
-    "created_at": "2026-02-24T23:20:16Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44318",
+    "created_at": "2026-02-27T06:40:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44268/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44268",
-    "labels": [],
-    "merged": true,
-    "number": 44268,
+    "files_url": "https://github.com/huggingface/transformers/pull/44318/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44318",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44318,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: fixes in `Trainer` class docs (`compute_loss` & `hyperparameter_search`)",
-    "updated_at": "2026-02-26T00:50:23Z"
+    "title": "Validate bounding box shape in LayoutLMv3Tokenizer",
+    "updated_at": "2026-02-27T14:43:08Z"
   },
   {
     "additions": 4,
-    "author": "manavshrivastavagit",
+    "author": "sxu75374",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Update the `DocumentQuestionAnsweringPipeline` docstring to explicitly mention the task summary in the Transformers documentation. - Remove the stale TODO comment now that document question answering is covered in the task sum\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Passes `config.rms_norm_eps` explicitly to `q_a_layernorm` and `kv_a_layernorm` in both DeepSeek V2 and V3 MLA attention. Currently these two norms are constructed without `eps`, falling back to the `RMSNorm` class\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44267",
-    "created_at": "2026-02-24T20:35:18Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44317",
+    "created_at": "2026-02-27T04:48:08Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44267/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44267",
+    "files_url": "https://github.com/huggingface/transformers/pull/44317/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44317",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44267,
+    "number": 44317,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs: point DocumentQuestionAnswering pipeline to task summary",
-    "updated_at": "2026-02-25T13:34:48Z"
+    "title": "fix(deepseek): pass rms_norm_eps to MLA q/kv layernorms",
+    "updated_at": "2026-02-27T14:30:04Z"
   },
   {
-    "additions": 27,
-    "author": "harshaljanjani",
+    "additions": 2,
+    "author": "jashshah999",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 **Reasoning:** The impact of this fix goes beyond `Mask2Former` and `DeformableDetr` and should fix any model that uses `torch_compilable_check`. Most use\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes #44303 The weight loading progress bar called `pbar.refresh()` on every single parameter, bypassing tqdm's built-in rate-limiting. When output is redirected to a log file (e.g. in CI), this produced one line per parameter -- hundreds\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44266",
-    "created_at": "2026-02-24T20:02:06Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44316",
+    "created_at": "2026-02-27T03:08:28Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44266/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44266",
+    "files_url": "https://github.com/huggingface/transformers/pull/44316/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44316",
     "labels": [],
-    "merged": true,
-    "number": 44266,
+    "merged": false,
+    "number": 44316,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(utils): Make torch_compilable_check compatible with torch.export strict mode",
-    "updated_at": "2026-02-26T09:42:47Z"
+    "title": "Reduce tqdm verbosity during weight loading",
+    "updated_at": "2026-03-03T17:02:34Z"
   },
   {
-    "additions": 90,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, WIP --> needs a test",
-    "changed_files": 36,
+    "additions": 3484,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, Transformers team! I submitted a PR to add naver-hyperclovax/HyperCLOVAX-SEED-Think-32B (hereafter HCX), developed by the Korean IT company Naver while executing the government's national AI model project. Th\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44264",
-    "created_at": "2026-02-24T18:06:58Z",
-    "deletions": 210,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44264/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44264",
+    "comments_count": 22,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44314",
+    "created_at": "2026-02-27T02:01:28Z",
+    "deletions": 18,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44314/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44314",
     "labels": [],
     "merged": false,
-    "number": 44264,
-    "review_comments_count": 3,
+    "number": 44314,
+    "review_comments_count": 77,
     "state": "open",
-    "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0",
-    "updated_at": "2026-02-25T18:53:20Z"
+    "title": "add HyperClovaX Vision",
+    "updated_at": "2026-04-13T02:23:53Z"
   },
   {
-    "additions": 5882,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI",
-    "changed_files": 18,
+    "additions": 4,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes the same `TypeError: AddedToken() got multiple values for keyword argument 'special'` that #44281 addressed, but for the `extra_special_tokens` code path which was missed. #44281 (commit 8e663c7) correctly added `value.pop(\"special\",\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44260",
-    "created_at": "2026-02-24T15:51:11Z",
-    "deletions": 6147,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44313",
+    "created_at": "2026-02-27T01:37:45Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44260/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44260",
-    "labels": [],
-    "merged": true,
-    "number": 44260,
-    "review_comments_count": 3,
+    "files_url": "https://github.com/huggingface/transformers/pull/44313/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44313",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44313,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update common tests Trainer",
-    "updated_at": "2026-02-27T17:31:59Z"
+    "title": "Fix AddedToken duplicate 'special' kwarg for extra_special_tokens",
+    "updated_at": "2026-02-27T14:26:28Z"
   },
   {
-    "additions": 1830,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. <!-- Congratulations! You've made it this f\u2026",
-    "changed_files": 6,
+    "additions": 8,
+    "author": "haosenwang1018",
+    "author_association": "NONE",
+    "body_excerpt": "Replace bare `except:` clauses with `except Exception:` for PEP 8 compliance.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44259",
-    "created_at": "2026-02-24T15:01:56Z",
-    "deletions": 59,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44259/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44259",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44312",
+    "created_at": "2026-02-27T01:00:33Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44312/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44312",
     "labels": [],
     "merged": false,
-    "number": 44259,
+    "number": 44312,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Async data producer",
-    "updated_at": "2026-02-26T19:57:43Z"
+    "state": "closed",
+    "title": "fix: replace 8 bare except clauses with except Exception",
+    "updated_at": "2026-02-27T03:27:27Z"
   },
   {
-    "additions": 8,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "The old one has a merge conflict and it was easier to just mirror into a new branch / PR Note that this only affects the local big tests that I used to run on the A100s locally; not to be run with the CI (too big)",
-    "changed_files": 2,
+    "additions": 38,
+    "author": "onel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds species bias documentation across the transformers repository to help model authors and users recognize and address potential biases in language models. The updates include guidance on documenting bias categori\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44258",
-    "created_at": "2026-02-24T15:00:29Z",
-    "deletions": 8,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44311",
+    "created_at": "2026-02-27T00:02:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44258/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44258",
+    "files_url": "https://github.com/huggingface/transformers/pull/44311/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44311",
     "labels": [],
     "merged": false,
-    "number": 44258,
+    "number": 44311,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Ernie 4.5 VL Moe`] Change revision",
-    "updated_at": "2026-03-14T19:59:05Z"
+    "title": "Add species bias documentation to model cards and docs",
+    "updated_at": "2026-02-27T14:09:20Z"
   },
   {
-    "additions": 3,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? When post training using context parallelism, some processes may have their chunk of the sample input masked out leading to a NaN loss for that process. Using `nanmean` allows us to keep the real loss that isn't `Na\u2026",
-    "changed_files": 1,
+    "additions": 63,
+    "author": "onel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds documentation for Pruna AI integration to the Transformers ecosystem, following the existing pattern used by vLLM and Unsloth integration docs. ## Changes - Created `docs/source/en/community_integrations/pruna.\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44257",
-    "created_at": "2026-02-24T14:56:42Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44310",
+    "created_at": "2026-02-27T00:00:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44257/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44257",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44310/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44310",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44257,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "use nanmean for aggregating loss",
-    "updated_at": "2026-02-25T17:01:08Z"
+    "number": 44310,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: Add Pruna AI integration documentation",
+    "updated_at": "2026-02-27T14:08:21Z"
   },
   {
-    "additions": 10,
-    "author": "albertvillanova",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fix CLI NameError: name 'TypeAdapter' is not defined: - Do not evaluate type annotations in CLI serve ### Problem Calling the CLI raises NameError: > NameError: name 'TypeAdapter' is not defined ```bash transformers --help ``` ```python Tr\u2026",
+    "additions": 129,
+    "author": "onel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds SkyPilot deployment documentation to the DeepSpeed guide. The new section includes: - Introduction to SkyPilot as a unified framework for running AI workloads across clouds and Kubernetes - Complete example YAM\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44256",
-    "created_at": "2026-02-24T14:54:49Z",
-    "deletions": 9,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44309",
+    "created_at": "2026-02-26T22:44:41Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44256/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44256",
-    "labels": [],
-    "merged": true,
-    "number": 44256,
+    "files_url": "https://github.com/huggingface/transformers/pull/44309/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44309",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44309,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CLI NameError: name 'TypeAdapter' is not defined",
-    "updated_at": "2026-03-12T07:57:14Z"
+    "title": "Add SkyPilot deployment documentation to DeepSpeed guide",
+    "updated_at": "2026-02-27T14:09:55Z"
   },
   {
-    "additions": 404,
-    "author": "itazap",
+    "additions": 5854,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "## What this PR does Given he different issues that were noticed by @hmellor on vLLM, we wanted to make sure we did not end up with crazy breaks. We ran a full test suite (code can be found in #44298) and the results showed 22 model conver\u2026",
-    "changed_files": 24,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44255",
-    "created_at": "2026-02-24T14:17:00Z",
-    "deletions": 205,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44308",
+    "created_at": "2026-02-26T21:26:05Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44255/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44255",
+    "files_url": "https://github.com/huggingface/transformers/pull/44308/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44308",
     "labels": [],
-    "merged": true,
-    "number": 44255,
-    "review_comments_count": 25,
+    "merged": false,
+    "number": 44308,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[vllm + v5 fix] handle TokenizersBackend fallback properly for v5",
-    "updated_at": "2026-03-23T11:07:37Z"
+    "title": "Codex/add sam3 litetext model to transformers fuvllg",
+    "updated_at": "2026-02-26T21:35:44Z"
   },
   {
-    "additions": 16,
-    "author": "mario-sanz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully refle\u2026",
-    "changed_files": 2,
+    "additions": 7,
+    "author": "imstevenpmwork",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes a `TypeError: not all arguments converted during string formatting` caused by incorrectly passing `FutureWarning` as a second argument to `logger.warning_once()` in this file, introduced in https://git\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44254",
-    "created_at": "2026-02-24T13:54:30Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44307",
+    "created_at": "2026-02-26T20:09:20Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44254/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44254",
-    "labels": [],
-    "merged": false,
-    "number": 44254,
-    "review_comments_count": 5,
+    "files_url": "https://github.com/huggingface/transformers/pull/44307/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44307",
+    "labels": [],
+    "merged": true,
+    "number": 44307,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix fast tokenizers overwriting custom `pre_tokenizer` from `tokenizer.json`",
-    "updated_at": "2026-02-26T08:45:56Z"
+    "title": "fix(modeling_attn_mask_utils): remove FutureWarning from logger.warning_once()",
+    "updated_at": "2026-02-26T21:29:01Z"
   },
   {
-    "additions": 9,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "`create_import_structure_from_path` does some redundant `os` calls, so I'm experimenting with changes to see if we can speed up loading a lot. Related to #44246",
-    "changed_files": 1,
+    "additions": 10,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a crash in `PretrainedConfig.update_from_string()` when the input string contains entries without `=` or with multiple `=` signs. **`configuration_utils.py`** - The existing code `dict(x.split(\"=\") for x in up\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44253",
-    "created_at": "2026-02-24T13:03:40Z",
-    "deletions": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44306",
+    "created_at": "2026-02-26T20:02:10Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44253/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44253",
-    "labels": [],
-    "merged": true,
-    "number": 44253,
+    "files_url": "https://github.com/huggingface/transformers/pull/44306/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44306",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44306,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Speed create_import_structure up with os.scandir()",
-    "updated_at": "2026-03-10T12:49:42Z"
+    "title": "Fix ValueError crash in PretrainedConfig.update_from_string on malformed input",
+    "updated_at": "2026-02-27T14:25:03Z"
   },
   {
-    "additions": 718,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Deprecate timm backbone in favor of keeping all models within one `timm` folder, similar to other vision models. A backbone is just a variation of `PreTrainedModel`",
-    "changed_files": 61,
+    "additions": 7,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a mutable default argument and two resource leaks: 1. **`integrations/tpu.py`** - `patched_optimizer_step` used `optimizer_args={}` as a default parameter. Mutable defaults are shared across calls, so any muta\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44252",
-    "created_at": "2026-02-24T13:00:59Z",
-    "deletions": 772,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44305",
+    "created_at": "2026-02-26T19:22:33Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44252/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44252",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44305/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44305",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44252,
-    "review_comments_count": 9,
-    "state": "open",
-    "title": "Timm unification continued",
-    "updated_at": "2026-02-26T13:35:44Z"
+    "number": 44305,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix mutable default in TPU optimizer and unclosed file handles",
+    "updated_at": "2026-02-27T14:21:58Z"
   },
   {
-    "additions": 1951,
-    "author": "Sai-Suraj-27",
+    "additions": 151,
+    "author": "adil-a",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Model Page: https://huggingface.co/jinaai/jina-embeddings-v3 Model Paper: https://huggingface.co/papers/2409.10173 Downloads last month > **5.3M** Completes Part of https://github.com/huggingface/transformers/issues\u2026",
-    "changed_files": 13,
+    "body_excerpt": "# What does this PR do? Adds NeMo Automodel under the community integrations tab. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contribut\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 29,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44251",
-    "created_at": "2026-02-24T12:56:24Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44304",
+    "created_at": "2026-02-26T17:57:16Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44251/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44251",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44304/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44304",
+    "labels": [],
     "merged": true,
-    "number": 44251,
-    "review_comments_count": 74,
+    "number": 44304,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "Add `Jina-Embeddings-V3` Model",
-    "updated_at": "2026-03-19T10:07:57Z"
+    "title": "docs: Add NeMo Automodel community integration docs",
+    "updated_at": "2026-03-03T16:51:48Z"
   },
   {
-    "additions": 5,
-    "author": "SunMarc",
+    "additions": 28,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/pull/43806#discussion_r2834269455. We removed `self.report_to == \"all\"` functionality by mistake. Adding it back !",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Integrate the neuron device to TrainingArguments. It enables using the neuron device with the `Trainer` class.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44250",
-    "created_at": "2026-02-24T12:38:21Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44302",
+    "created_at": "2026-02-26T15:11:09Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44250/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44250",
+    "files_url": "https://github.com/huggingface/transformers/pull/44302/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44302",
     "labels": [],
     "merged": true,
-    "number": 44250,
+    "number": 44302,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix regression report_to \"all\"",
-    "updated_at": "2026-02-24T12:55:06Z"
+    "title": "Integrate the Neuron device to TrainingArguments",
+    "updated_at": "2026-03-05T15:11:00Z"
   },
   {
-    "additions": 9,
-    "author": "Ryan-J-MAX",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR fix? This PR adds backward compatibility for the deprecated `grouped_entities` parameter in the `TokenClassificationPipeline`. ## Problem The `grouped_entities` parameter was deprecated in favor of `aggregation_strateg\u2026",
+    "additions": 30,
+    "author": "likejazz",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? When fine-tuning Qwen3 with frameworks like TRL, `<think>` blocks are silently omitted from the token sequence, causing chain-of-thought reasoning data to be completely lost during training with no error or warning\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44249",
-    "created_at": "2026-02-24T10:48:54Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44301",
+    "created_at": "2026-02-26T14:30:24Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44249/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44249",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44301/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44301",
+    "labels": [],
     "merged": false,
-    "number": 44249,
+    "number": 44301,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add backward compatibility for grouped_entities parameter",
-    "updated_at": "2026-02-24T12:31:26Z"
+    "title": "Fix: Qwen3 `<think>` blocks not written during fine-tuning (TRL)",
+    "updated_at": "2026-03-02T17:18:03Z"
   },
   {
-    "additions": 12,
-    "author": "yonigozlan",
+    "additions": 2539,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix backward compatibility with remote code for old processors not defining valid_kwargs (e.g. phi4) Cc @zucchini-nlp Fix `test_processor_override` for phi3v and phi4 in vllm @hmellor",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? The recursive feature is needed for me in https://github.com/huggingface/transformers/pull/44252 to allow timm backbone define its conversion only once. Also it currently allows to delete \"t5gemma2\" from conversion,\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44245",
-    "created_at": "2026-02-23T21:47:19Z",
-    "deletions": 4,
+    "comments_count": 44,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44300",
+    "created_at": "2026-02-26T14:09:59Z",
+    "deletions": 470,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44245/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44245",
+    "files_url": "https://github.com/huggingface/transformers/pull/44300/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44300",
     "labels": [],
     "merged": true,
-    "number": 44245,
-    "review_comments_count": 3,
+    "number": 44300,
+    "review_comments_count": 45,
     "state": "closed",
-    "title": "Fix image processors `from_dict` backward compatibility with old remote code",
-    "updated_at": "2026-02-24T15:17:37Z"
+    "title": "Dynamic weight conversion is recursive",
+    "updated_at": "2026-03-26T11:59:06Z"
   },
   {
-    "additions": 63,
-    "author": "thakoreh",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44242 Load balancing loss was not being added when `output_router_logits=False` in Mixtral models. ## Changes - Fixed loss calculation to include load balancing even when router logits are not output - Added test case ##\u2026",
-    "changed_files": 2,
+    "additions": 520,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, to allow for proper alignment with vllm/sglang Closes #44258",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44243",
-    "created_at": "2026-02-23T21:27:09Z",
-    "deletions": 5,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44299",
+    "created_at": "2026-02-26T12:50:03Z",
+    "deletions": 282,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44243/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44243",
+    "files_url": "https://github.com/huggingface/transformers/pull/44299/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44299",
+    "labels": [],
+    "merged": true,
+    "number": 44299,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": ":rotating_light: [`Ernie 4.5 VL Moe`] Fix up namings to vllm/sglang convention",
+    "updated_at": "2026-02-26T16:42:50Z"
+  },
+  {
+    "additions": 1145,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? A few issues we did not catch: - https://github.com/huggingface/transformers/blob/47b0e478f324b54f177ea7998a0791870fdd0324/src/transformers/convert_slow_tokenizer.py#L1314-L1315 missing from `GemmaTokenier` - SPM's\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44298",
+    "created_at": "2026-02-26T12:34:38Z",
+    "deletions": 29,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44298/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44298",
     "labels": [],
     "merged": false,
-    "number": 44243,
-    "review_comments_count": 0,
+    "number": 44298,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Auto detect wrong mapping models",
+    "updated_at": "2026-03-02T10:13:28Z"
+  },
+  {
+    "additions": 12302,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Needed for https://github.com/huggingface/transformers/pull/41250 to pass the docstring-checker in CI. Our basic checker doesn't handle well dataclasses so we can use `autodocstring`",
+    "changed_files": 512,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44296",
+    "created_at": "2026-02-26T10:29:21Z",
+    "deletions": 37860,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44296/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44296",
+    "labels": [],
+    "merged": true,
+    "number": 44296,
+    "review_comments_count": 11,
     "state": "closed",
-    "title": "fix: add load balancing loss when output_router_logits=False",
-    "updated_at": "2026-02-23T21:54:11Z"
+    "title": "Add auto-docstring on configs",
+    "updated_at": "2026-03-06T11:58:10Z"
   },
   {
-    "additions": 9,
-    "author": "yushiran",
+    "additions": 1,
+    "author": "mario-sanz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Adds missing `-> bool`, `-> int`, and `-> str | None` return type annotations to public utility functions in `utils/generic.py`, making them consistent with the newer `is_timm_config_dict` and `is_timm_local_checkpoint` function\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44241",
-    "created_at": "2026-02-23T19:50:05Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44294",
+    "created_at": "2026-02-26T08:30:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44241/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44241",
+    "files_url": "https://github.com/huggingface/transformers/pull/44294/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44294",
     "labels": [],
     "merged": true,
-    "number": 44241,
+    "number": 44294,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add missing return type annotations to type-checking utilities in generic.py",
-    "updated_at": "2026-02-24T13:27:11Z"
+    "title": "Fix: use `TokenizersBackend` for Olmo3 to preserve custom `pre_tokenizer`",
+    "updated_at": "2026-02-26T10:35:44Z"
   },
   {
-    "additions": 2,
-    "author": "tarekziade",
+    "additions": 13,
+    "author": "IlyasMoutawwakil",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Makes sure `find_bad_commit` always return the result `dict`",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44240",
-    "created_at": "2026-02-23T19:12:49Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44293",
+    "created_at": "2026-02-26T08:25:23Z",
+    "deletions": 384,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44240/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44240",
+    "files_url": "https://github.com/huggingface/transformers/pull/44293/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44293",
     "labels": [],
     "merged": true,
-    "number": 44240,
-    "review_comments_count": 0,
+    "number": 44293,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "Fix return value - fixes #44238",
-    "updated_at": "2026-02-24T13:02:59Z"
+    "title": "Strict export cleanup",
+    "updated_at": "2026-03-02T09:36:19Z"
   },
   {
-    "additions": 253,
+    "additions": 548,
     "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "part 2 of refactoring the training docs adds new dedicated guide to callbacks and data collators todo: - [x] backlink to `## Next steps` in `trainer.md` once https://github.com/huggingface/transformers/pull/44185 is merged",
-    "changed_files": 7,
+    "body_excerpt": "- adds `trainer_recipes.md` to show how to use other practical `Trainer` features outside of the basic training loop - updates hyperparam search docs - updates `optimizers.md` with how to customize it (prebuilt instances, passing a class +\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44239",
-    "created_at": "2026-02-23T18:54:55Z",
-    "deletions": 47,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44290",
+    "created_at": "2026-02-26T01:02:15Z",
+    "deletions": 210,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44239/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44239",
+    "files_url": "https://github.com/huggingface/transformers/pull/44290/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44290",
     "labels": [],
     "merged": true,
-    "number": 44239,
-    "review_comments_count": 10,
+    "number": 44290,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "[docs] callbacks and collators",
-    "updated_at": "2026-02-24T22:12:46Z"
+    "title": "[docs] optimizers, hyperparam search, training features",
+    "updated_at": "2026-04-09T20:00:53Z"
   },
   {
-    "additions": 1,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? small nit but will be misleading if not fixed",
-    "changed_files": 1,
+    "additions": 8,
+    "author": "haosenwang1018",
+    "author_association": "NONE",
+    "body_excerpt": "Replace bare except clauses with except Exception.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44237",
-    "created_at": "2026-02-23T17:52:17Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44289",
+    "created_at": "2026-02-26T00:58:35Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44237/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44237",
+    "files_url": "https://github.com/huggingface/transformers/pull/44289/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44289",
     "labels": [],
-    "merged": true,
-    "number": 44237,
+    "merged": false,
+    "number": 44289,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[mimi] nit",
-    "updated_at": "2026-02-24T15:43:55Z"
+    "title": "fix: replace 8 bare except clauses with except Exception",
+    "updated_at": "2026-02-26T12:52:48Z"
   },
   {
-    "additions": 109,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43847 When using zero3 + from_config, the model was incorrectly initialized as we were not gathering the params. Added a test also. cc @tohtana",
-    "changed_files": 5,
+    "additions": 1,
+    "author": "somAzzz",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? When loading the new Qwen 3.5 models (e.g., 'Qwen/Qwen3.5-35B-A3B') using the 'transformers' (5.3.0.dev0), the initialization crashes with a 'TypeError' . **Error Traceback Context:** (APIServer pid=98544) File \"...\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44236",
-    "created_at": "2026-02-23T17:20:01Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44288",
+    "created_at": "2026-02-25T22:43:41Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44236/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44236",
+    "files_url": "https://github.com/huggingface/transformers/pull/44288/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44288",
     "labels": [],
-    "merged": true,
-    "number": 44236,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 44288,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix zero3 init config",
-    "updated_at": "2026-02-27T11:36:19Z"
+    "title": "Fix TypeError in modeling_rope_utils.py when ignore_keys_at_rope_vali\u2026",
+    "updated_at": "2026-02-26T20:09:07Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "UPDATE TO: https://github.com/huggingface/transformers/pull/44179/changes Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
-    "changed_files": 1,
+    "additions": 11,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes mutable default arguments and unclosed file handles across several files. **Mutable defaults** (can cause shared state across calls): - `debug_utils.py`: `DebugUnderflowOverflow.__init__` `trace_batch_nums=[]`\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44235",
-    "created_at": "2026-02-23T17:06:54Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44287",
+    "created_at": "2026-02-25T22:23:20Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44235/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44235",
+    "files_url": "https://github.com/huggingface/transformers/pull/44287/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44287",
     "labels": [],
     "merged": true,
-    "number": 44235,
+    "number": 44287,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "update fuyu tokenizer class",
-    "updated_at": "2026-02-23T17:36:22Z"
+    "title": "Fix mutable default arguments and resource leaks",
+    "updated_at": "2026-03-02T15:17:25Z"
   },
   {
-    "additions": 249,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Cc @zucchini-nlp",
+    "additions": 31,
+    "author": "kathrynle20",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds on to the Fouroversix integration by adding support for quantized models such as the gpt-oss model by adding weight conversions and an additional config argument. Reference: https://github.com/huggingfa\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44234",
-    "created_at": "2026-02-23T17:03:05Z",
-    "deletions": 55,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44286",
+    "created_at": "2026-02-25T22:15:15Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44234/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44234",
+    "files_url": "https://github.com/huggingface/transformers/pull/44286/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44286",
     "labels": [],
     "merged": true,
-    "number": 44234,
-    "review_comments_count": 2,
+    "number": 44286,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Add processing tests for phi4 multimodal",
-    "updated_at": "2026-02-23T22:08:11Z"
+    "title": "Add future model support for Fouroversix",
+    "updated_at": "2026-03-04T16:28:13Z"
   },
   {
-    "additions": 219,
-    "author": "tarekziade",
+    "additions": 3484,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "Extends `ty` coverage to `src/transformers/generation` - Added a dedicated type-check wrapper script: `utils/check_types.py`. - Updated `Makefile` to run `ty` checks through the wrapper in both `style` and `check-repo`. - merged all typing\u2026",
-    "changed_files": 15,
+    "body_excerpt": "# What does this PR do? This PR adds the VidEoMT model, as described in [VidEoMT: Your ViT is Secretly Also a Video Segmentation Model](https://huggingface.co/papers/2602.17807). Gradio demo (running on ZeroGPU): https://huggingface.co/spa\u2026",
+    "changed_files": 17,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44233",
-    "created_at": "2026-02-23T16:23:24Z",
-    "deletions": 101,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44285",
+    "created_at": "2026-02-25T19:24:39Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44233/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44233",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44285/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44285",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44233,
-    "review_comments_count": 33,
+    "number": 44285,
+    "review_comments_count": 57,
     "state": "closed",
-    "title": "chore(typing): Add type checking to `src/transformers/generation`",
-    "updated_at": "2026-03-04T17:24:37Z"
+    "title": "Add VidEoMT",
+    "updated_at": "2026-03-25T17:05:47Z"
+  },
+  {
+    "additions": 388,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #44242 where Mixtral models do not compute auxiliary load balancing loss when `output_router_logits=False`, even when `router_aux_loss_coef > 0`. ## Problem According to the [Mixtral documentation](https://huggingface.\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44284",
+    "created_at": "2026-02-25T18:38:15Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44284/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44284",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44284,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Mixtral auxiliary loss computation when output_router_logits=False",
+    "updated_at": "2026-02-26T12:41:46Z"
   },
   {
-    "additions": 11,
-    "author": "tarekziade",
+    "additions": 1,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? per https://code.claude.com/docs/en/claude-code-on-the-web#best-practices `CLAUDE.md` can alias directly into `AGENTS.md`",
-    "changed_files": 2,
+    "body_excerpt": "Accidentally caused by #43325, wrong naming --> modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44232",
-    "created_at": "2026-02-23T16:10:15Z",
-    "deletions": 109,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44283",
+    "created_at": "2026-02-25T18:33:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44232/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44232",
+    "files_url": "https://github.com/huggingface/transformers/pull/44283/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44283",
     "labels": [],
     "merged": true,
-    "number": 44232,
-    "review_comments_count": 2,
+    "number": 44283,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: added CLAUDE.md alias",
-    "updated_at": "2026-02-24T14:48:36Z"
+    "title": "[`Modular`] Fix file type regression",
+    "updated_at": "2026-02-25T20:04:41Z"
   },
   {
-    "additions": 413,
-    "author": "IlyasMoutawwakil",
+    "additions": 5,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44231",
-    "created_at": "2026-02-23T15:45:47Z",
-    "deletions": 578,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44282",
+    "created_at": "2026-02-25T17:57:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44231/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44231",
+    "files_url": "https://github.com/huggingface/transformers/pull/44282/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44282",
     "labels": [],
     "merged": true,
-    "number": 44231,
-    "review_comments_count": 18,
+    "number": 44282,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Performance] FP8 Grouped and Batched Matmuls",
-    "updated_at": "2026-03-11T08:51:02Z"
+    "title": "Restore response_schema saving-loading",
+    "updated_at": "2026-02-25T18:27:22Z"
   },
   {
-    "additions": 4,
-    "author": "alvarobartt",
+    "additions": 1,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds the missing backtick (`) on the `AnyToAnyPipeline.__call__` docstrings, as those were showing as in the screenshot below instead. <img width=\"1023\" height=\"400\" alt=\"image\" src=\"https://github.com/user-\u2026",
+    "body_excerpt": "# What does this PR do? Its a very small fix for #44062",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44229",
-    "created_at": "2026-02-23T15:25:47Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44281",
+    "created_at": "2026-02-25T16:28:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44229/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44229",
+    "files_url": "https://github.com/huggingface/transformers/pull/44281/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44281",
     "labels": [],
     "merged": true,
-    "number": 44229,
+    "number": 44281,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing backtick in `AnyToAnyPipeline.__call__` docstring",
-    "updated_at": "2026-02-23T19:21:08Z"
+    "title": "Fix special token maps BC",
+    "updated_at": "2026-02-26T10:34:17Z"
   },
   {
-    "additions": 35,
-    "author": "JonoLF",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 614,
+    "author": "RishabhMehra",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44228",
-    "created_at": "2026-02-23T15:09:05Z",
-    "deletions": 7,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44278",
+    "created_at": "2026-02-25T12:49:56Z",
+    "deletions": 63,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44228/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44228",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44278/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44278",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44228,
+    "number": 44278,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Quantisation] account for nested tensors from quantisers",
-    "updated_at": "2026-03-17T11:57:53Z"
+    "state": "closed",
+    "title": "[FEAT] Pipelines - Faster group_entities",
+    "updated_at": "2026-02-25T13:54:58Z"
   },
   {
-    "additions": 21,
-    "author": "remi-or",
+    "additions": 105,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds a logging message when infering the behavior of use async and fixes an error when evicting a graph from the graph buffer.",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? This patch makes the GLM-ASR doc example runnable by using `runnables` - see https://github.com/huggingface/doc-builder/blob/main/docs/runnable-code-blocks.md",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44227",
-    "created_at": "2026-02-23T14:53:53Z",
-    "deletions": 13,
+    "comments_count": 36,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44277",
+    "created_at": "2026-02-25T08:49:20Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44227/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44227",
+    "files_url": "https://github.com/huggingface/transformers/pull/44277/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44277",
     "labels": [],
     "merged": true,
-    "number": 44227,
-    "review_comments_count": 9,
+    "number": 44277,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "[CB] Small fixes",
-    "updated_at": "2026-03-03T13:40:10Z"
+    "title": "Use doc-builder runnable example for GLM-ASR",
+    "updated_at": "2026-04-02T16:16:55Z"
   },
   {
-    "additions": 86,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. It looks like some models (xlnet and kosmos2_5) and most audio models sometimes rely on the full previous input_ids to prepare inputs. Note that this cannot be compatible with restarting generation\u2026",
-    "changed_files": 12,
+    "additions": 0,
+    "author": "vishalpatil-45",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR addresses the performance regression where `import transformers` takes ~3.5s. The issue was caused by eager imports of heavy backend libraries (like torch/numpy) during the initial module load. By moving the\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44226",
-    "created_at": "2026-02-23T13:27:23Z",
-    "deletions": 66,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44275",
+    "created_at": "2026-02-25T08:27:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44226/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44226",
-    "labels": [],
-    "merged": true,
-    "number": 44226,
-    "review_comments_count": 17,
+    "files_url": "https://github.com/huggingface/transformers/pull/44275/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44275",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44275,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[generate] Always pass full input_ids in `prepare_inputs_for_generation`",
-    "updated_at": "2026-02-24T10:45:49Z"
+    "title": "[Fix] Restore lazy loading to improve import performance (#44273)",
+    "updated_at": "2026-02-25T20:37:18Z"
   },
   {
-    "additions": 169,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, we weren't running these test for some time because they were being filtered into `non-model` tests. But `non-model` doesn't run tests that are marked as generation, so I moved it back to `generation`\u2026",
-    "changed_files": 5,
+    "additions": 559,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #44242 This PR resolves an issue where the auxiliary load balancing loss was not computed when `output_router_logits=False`, even when `router_aux_loss_coef != 0`. ## Problem The auxiliary loss computation was incorrec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44225",
-    "created_at": "2026-02-23T12:09:40Z",
-    "deletions": 270,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44274",
+    "created_at": "2026-02-25T06:38:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44225/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44225",
-    "labels": [],
-    "merged": true,
-    "number": 44225,
-    "review_comments_count": 18,
+    "files_url": "https://github.com/huggingface/transformers/pull/44274/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44274",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44274,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix generation integration tests",
-    "updated_at": "2026-02-25T09:19:39Z"
+    "title": "Fix auxiliary load balancing loss computation when output_router_logits=False",
+    "updated_at": "2026-02-25T13:36:03Z"
   },
   {
-    "additions": 5,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Small fixes after https://github.com/huggingface/transformers/pull/44130. See https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/8785954cca2fdca181de0b9567059471bcadb959/2026-02-21/ci_resu\u2026",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "hangjun-ezra",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `TypeError: unsupported operand type(s) for |: 'list' and 'set'` in `RotaryEmbeddingConfigMixin.convert_rope_params_to_dict` when `ignore_keys_at_rope_validation` is a `list` instead of a `set`. ### Root ca\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44224",
-    "created_at": "2026-02-23T10:48:19Z",
-    "deletions": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44272",
+    "created_at": "2026-02-25T03:52:04Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44224/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44224",
+    "files_url": "https://github.com/huggingface/transformers/pull/44272/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44272",
     "labels": [],
-    "merged": false,
-    "number": 44224,
+    "merged": true,
+    "number": 44272,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Small fixes",
-    "updated_at": "2026-02-24T10:06:14Z"
+    "title": "Fix TypeError in convert_rope_params_to_dict when ignore_keys is a list",
+    "updated_at": "2026-02-25T14:38:36Z"
+  },
+  {
+    "additions": 1272,
+    "author": "balak4",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Add GreedyLR, a metric-based adaptive learning rate scheduler that adjusts the learning rate during training based on the current loss - Based on [\"Dynamic Learning Rate Scheduling based on Loss Changes Leads to Faster Converg\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44271",
+    "created_at": "2026-02-25T01:40:57Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44271/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44271",
+    "labels": [],
+    "merged": true,
+    "number": 44271,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Add GreedyLR adaptive learning rate scheduler",
+    "updated_at": "2026-03-18T18:45:46Z"
   },
   {
-    "additions": 1,
-    "author": "albertvillanova",
+    "additions": 88,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "Fix type of `TrainingArguments.logging_steps`. This PR makes a minor update to the `TrainingArguments` class, so `logging_steps` parameter accepts both integers and floats, rather than only floats. Note these are the expected types in the\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? A lot of ProcessorsKwargs have incorrect/unspecified type hints in their ProcessorsKwargs TypedDict for their images_kwargs attribute. Functionnaly, this did not cause issues as \"_merge_kwargs\" automatically picks u\u2026",
+    "changed_files": 44,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44223",
-    "created_at": "2026-02-23T08:50:04Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44270",
+    "created_at": "2026-02-25T00:11:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44223/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44223",
+    "files_url": "https://github.com/huggingface/transformers/pull/44270/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44270",
     "labels": [],
     "merged": false,
-    "number": 44223,
+    "number": 44270,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix type of TrainingArguments.logging_steps",
-    "updated_at": "2026-02-23T09:08:18Z"
+    "state": "open",
+    "title": "Add correct typing to custom images_kwargs in ProcessorsKwargs",
+    "updated_at": "2026-02-25T01:12:06Z"
   },
   {
-    "additions": 1,
-    "author": "matisgagneux21",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - fix a typo in the Italian add-new-model guide: `docstirng` -> `docstring`. ## Why Small docs quality fix that avoids confusion for readers following the contribution guide.",
-    "changed_files": 1,
+    "additions": 30,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This is a follow-up to https://github.com/huggingface/transformers/pull/43748, and will allow to have clickable links to the full modality kwargs when present in the docstring of a processor or image processor Cc @s\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44219",
-    "created_at": "2026-02-23T00:43:59Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44269",
+    "created_at": "2026-02-25T00:05:47Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44219/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44219",
+    "files_url": "https://github.com/huggingface/transformers/pull/44269/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44269",
     "labels": [],
     "merged": true,
-    "number": 44219,
+    "number": 44269,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs(it): fix typo in docstring wording",
-    "updated_at": "2026-02-23T15:04:51Z"
+    "title": "Add `ProcessingKwargs` `ImagesKwargs` etc. to docs",
+    "updated_at": "2026-02-27T19:03:15Z"
   },
   {
-    "additions": 1,
-    "author": "matisgagneux21",
+    "additions": 5,
+    "author": "ethanknights",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - fix a typo in the Italian migration guide install command: - `stentencepiece` -> `sentencepiece` ## Why The current command fails if copied as-is. This makes the installation snippet runnable for users reading the Italian docs.",
+    "body_excerpt": "# What does this PR do? Some improvements to the `trainer.py` docs. ## Before submitting - [x] This PR fixes a typo or improves the docs. ## Who can review? Documentation: @stevhliu",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44218",
-    "created_at": "2026-02-23T00:32:49Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44268",
+    "created_at": "2026-02-24T23:20:16Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44218/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44218",
+    "files_url": "https://github.com/huggingface/transformers/pull/44268/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44268",
     "labels": [],
     "merged": true,
-    "number": 44218,
+    "number": 44268,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs(it): fix typo in sentencepiece install command",
-    "updated_at": "2026-02-23T15:05:17Z"
+    "title": "chore: fixes in `Trainer` class docs (`compute_loss` & `hyperparameter_search`)",
+    "updated_at": "2026-02-26T00:50:23Z"
   },
   {
-    "additions": 1,
-    "author": "matisgagneux21",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Update the Italian migration guide to avoid pointing users to the deprecated `grouped_entities` flag. - Clarify that `aggregation_strategy` is the current option (with a note that it was previously `grouped_entities`). ## Why\u2026",
+    "additions": 4,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Update the `DocumentQuestionAnsweringPipeline` docstring to explicitly mention the task summary in the Transformers documentation. - Remove the stale TODO comment now that document question answering is covered in the task sum\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44217",
-    "created_at": "2026-02-23T00:10:48Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44267",
+    "created_at": "2026-02-24T20:35:18Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44217/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44217",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44267/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44267",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44217,
+    "number": 44267,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs(it): update deprecated grouped_entities reference",
-    "updated_at": "2026-02-23T14:56:19Z"
+    "title": "Docs: point DocumentQuestionAnswering pipeline to task summary",
+    "updated_at": "2026-02-25T13:34:48Z"
   },
   {
-    "additions": 13,
-    "author": "nikste",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Automated fix for #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook Fixes #44016 ## What does this PR do? This PR addresses issue #44016 by implementing the fix described in the issue. ---\u2026",
-    "changed_files": 1,
+    "additions": 27,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 **Reasoning:** The impact of this fix goes beyond `Mask2Former` and `DeformableDetr` and should fix any model that uses `torch_compilable_check`. Most use\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44216",
-    "created_at": "2026-02-22T23:40:56Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44266",
+    "created_at": "2026-02-24T20:02:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44216/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44216",
+    "files_url": "https://github.com/huggingface/transformers/pull/44266/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44266",
     "labels": [],
-    "merged": false,
-    "number": 44216,
+    "merged": true,
+    "number": 44266,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook",
-    "updated_at": "2026-02-22T23:50:57Z"
+    "title": "fix(utils): Make torch_compilable_check compatible with torch.export strict mode",
+    "updated_at": "2026-02-26T09:42:47Z"
   },
   {
-    "additions": 187,
-    "author": "jmriosal",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? Add sequence classification capabilities to the family of Granite models (Granite, GraniteMoe, GraniteMoeHybrid, and GraniteMoeShared). Fixes #44214, #35720 ## Why The Granite models currently only have the base mod\u2026",
-    "changed_files": 17,
+    "additions": 90,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, WIP --> needs a test",
+    "changed_files": 36,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44215",
-    "created_at": "2026-02-22T23:24:43Z",
-    "deletions": 13,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44215/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44215",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44264",
+    "created_at": "2026-02-24T18:06:58Z",
+    "deletions": 210,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44264/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44264",
     "labels": [],
     "merged": false,
-    "number": 44215,
-    "review_comments_count": 0,
+    "number": 44264,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "Add sequence classification capability to Granite models",
-    "updated_at": "2026-02-24T20:39:37Z"
+    "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0",
+    "updated_at": "2026-02-25T18:53:20Z"
   },
   {
-    "additions": 70,
-    "author": "parthchopra07",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR refreshes the BEiT model documentation to align it with the current Transformers vision docs style and features. It updates the usage examples, clarifies configuration details, and improves the resources sec\u2026",
-    "changed_files": 1,
+    "additions": 5882,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44213",
-    "created_at": "2026-02-22T18:32:16Z",
-    "deletions": 29,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44260",
+    "created_at": "2026-02-24T15:51:11Z",
+    "deletions": 6147,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44213/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44213",
+    "files_url": "https://github.com/huggingface/transformers/pull/44260/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44260",
     "labels": [],
-    "merged": false,
-    "number": 44213,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44260,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Update BEiT model card",
-    "updated_at": "2026-02-28T14:33:57Z"
+    "title": "Update common tests Trainer",
+    "updated_at": "2026-02-27T17:31:59Z"
   },
   {
-    "additions": 1,
-    "author": "alexandercarruthers",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Updates a broken link in the summarization guide. https://huggingface.co/docs/transformers/tasks/summarization https://huggingface.co/billsum/datasets results in a 404. New URL is https://huggingface.co/datasets/Fis\u2026",
-    "changed_files": 1,
+    "additions": 1830,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. <!-- Congratulations! You've made it this f\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44212",
-    "created_at": "2026-02-22T18:02:43Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44212/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44212",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44259",
+    "created_at": "2026-02-24T15:01:56Z",
+    "deletions": 59,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44259/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44259",
     "labels": [],
-    "merged": true,
-    "number": 44212,
+    "merged": false,
+    "number": 44259,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Update 404ing BillSum dataset URL on Summarization Task guide",
-    "updated_at": "2026-02-23T14:46:11Z"
+    "state": "open",
+    "title": "Async data producer",
+    "updated_at": "2026-02-26T19:57:43Z"
   },
   {
-    "additions": 10,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## Fix for #44016 The `grouped_entities` parameter in `TokenClassificationPipeline._sanitize_parameters` was removed without a deprecation period, causing a `TypeError` when users pass `grouped_entities=True` to the `pipeline()` call (as s\u2026",
-    "changed_files": 1,
+    "additions": 8,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "The old one has a merge conflict and it was easier to just mirror into a new branch / PR Note that this only affects the local big tests that I used to run on the A100s locally; not to be run with the CI (too big)",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44211",
-    "created_at": "2026-02-22T17:04:50Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44258",
+    "created_at": "2026-02-24T15:00:29Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44211/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44211",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44258/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44258",
+    "labels": [],
     "merged": false,
-    "number": 44211,
+    "number": 44258,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add backward compatibility for deprecated grouped_entities parameter",
-    "updated_at": "2026-02-23T16:26:02Z"
+    "title": "[`Ernie 4.5 VL Moe`] Change revision",
+    "updated_at": "2026-03-14T19:59:05Z"
   },
   {
-    "additions": 1,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44206 ## Problem PR #43769 (\"Add Voxtral Realtime\") added a `center` parameter to `LasrFeatureExtractor.__call__()` and passed it to `_torch_extract_fbank_features()`, but that method does not accept it. This causes a `TypeError` on\u2026",
+    "additions": 3,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? When post training using context parallelism, some processes may have their chunk of the sample input masked out leading to a NaN loss for that process. Using `nanmean` allows us to keep the real loss that isn't `Na\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44210",
-    "created_at": "2026-02-22T16:06:16Z",
-    "deletions": 4,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44257",
+    "created_at": "2026-02-24T14:56:42Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44210/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44210",
+    "files_url": "https://github.com/huggingface/transformers/pull/44257/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44257",
     "labels": [],
     "merged": false,
-    "number": 44210,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(lasr): remove spurious center arg from _torch_extract_fbank_features call",
-    "updated_at": "2026-02-23T14:32:41Z"
+    "number": 44257,
+    "review_comments_count": 5,
+    "state": "open",
+    "title": "use nanmean for aggregating loss",
+    "updated_at": "2026-02-25T17:01:08Z"
   },
   {
-    "additions": 197,
-    "author": "paipeline",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? This PR fixes a critical bug in LayoutLMv2Tokenizer where passing `word_labels` for NER token classification tasks would crash with `AttributeError`. The issue was that `word_ids` and `sequence_ids` were being acce\u2026",
-    "changed_files": 3,
+    "additions": 10,
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fix CLI NameError: name 'TypeAdapter' is not defined: - Do not evaluate type annotations in CLI serve ### Problem Calling the CLI raises NameError: > NameError: name 'TypeAdapter' is not defined ```bash transformers --help ``` ```python Tr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44209",
-    "created_at": "2026-02-22T14:37:25Z",
-    "deletions": 3,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44256",
+    "created_at": "2026-02-24T14:54:49Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44209/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44209",
+    "files_url": "https://github.com/huggingface/transformers/pull/44256/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44256",
     "labels": [],
-    "merged": false,
-    "number": 44209,
+    "merged": true,
+    "number": 44256,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix LayoutLMv2Tokenizer NER crashes with word_labels",
-    "updated_at": "2026-02-23T10:30:26Z"
+    "title": "Fix CLI NameError: name 'TypeAdapter' is not defined",
+    "updated_at": "2026-03-12T07:57:14Z"
   },
   {
-    "additions": 1,
-    "author": "ainergiz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes a LASR regression introduced in `#43769` (released in `v5.2.0`). `LasrFeatureExtractor.__call__` passes `center` into `_torch_extract_fbank_features(...)`, but `_torch_extract_fbank_features` did not a\u2026",
-    "changed_files": 3,
+    "additions": 404,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "## What this PR does Given he different issues that were noticed by @hmellor on vLLM, we wanted to make sure we did not end up with crazy breaks. We ran a full test suite (code can be found in #44298) and the results showed 22 model conver\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44207",
-    "created_at": "2026-02-21T20:56:49Z",
-    "deletions": 70,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44255",
+    "created_at": "2026-02-24T14:17:00Z",
+    "deletions": 205,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44207/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44207",
+    "files_url": "https://github.com/huggingface/transformers/pull/44255/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44255",
     "labels": [],
     "merged": true,
-    "number": 44207,
-    "review_comments_count": 0,
+    "number": 44255,
+    "review_comments_count": 25,
     "state": "closed",
-    "title": "Fix LASR feature extractor regression from invalid center argument",
-    "updated_at": "2026-02-23T10:01:35Z"
+    "title": "[vllm + v5 fix] handle TokenizersBackend fallback properly for v5",
+    "updated_at": "2026-03-23T11:07:37Z"
   },
   {
-    "additions": 1,
-    "author": "nightcityblade",
+    "additions": 16,
+    "author": "mario-sanz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44186 `LayoutLMv2Tokenizer.__init__` passes `only_label_first_subword` to `super().__init__()` but never stores it as `self.only_label_first_subword`. This causes an `AttributeError` when `word_labels` is pa\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully refle\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44204",
-    "created_at": "2026-02-21T16:06:46Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44254",
+    "created_at": "2026-02-24T13:54:30Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44204/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44204",
+    "files_url": "https://github.com/huggingface/transformers/pull/44254/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44254",
     "labels": [],
     "merged": false,
-    "number": 44204,
-    "review_comments_count": 0,
+    "number": 44254,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "fix(layoutlmv2): store only_label_first_subword attribute in tokenizer",
-    "updated_at": "2026-02-23T10:30:19Z"
+    "title": "Fix fast tokenizers overwriting custom `pre_tokenizer` from `tokenizer.json`",
+    "updated_at": "2026-02-26T08:45:56Z"
   },
   {
-    "additions": 22,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44075 `_get_sgd()`, `_get_adagrad()`, and `_get_rmsprop()` in `trainer_optimizer.py` only returned `ctx.optimizer_kwargs` (which contains just `lr`), completely ignoring `ctx.optim_args`. This meant that parameters specif\u2026",
+    "additions": 9,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "`create_import_structure_from_path` does some redundant `os` calls, so I'm experimenting with changes to see if we can speed up loading a lot. Related to #44246",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44203",
-    "created_at": "2026-02-21T15:12:17Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44253",
+    "created_at": "2026-02-24T13:03:40Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44203/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44203",
+    "files_url": "https://github.com/huggingface/transformers/pull/44253/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44253",
     "labels": [],
     "merged": true,
-    "number": 44203,
+    "number": 44253,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(trainer): pass optim_args to SGD, Adagrad, and RMSprop optimizers",
-    "updated_at": "2026-02-25T16:04:20Z"
+    "title": "Speed create_import_structure up with os.scandir()",
+    "updated_at": "2026-03-10T12:49:42Z"
   },
   {
-    "additions": 63,
-    "author": "GS-GOAT",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? When `torch.compile` is used, [_ignore_bidirectional_mask_sdpa](cci:1://file:///c:/Users/BIT/Desktop/proj/gitrepo_clones/transformers/src/transformers/masking_utils.py:303:0-338:16) behaves differently than in eager\u2026",
-    "changed_files": 2,
+    "additions": 718,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Deprecate timm backbone in favor of keeping all models within one `timm` folder, similar to other vision models. A backbone is just a variation of `PreTrainedModel`",
+    "changed_files": 61,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44202",
-    "created_at": "2026-02-21T13:45:15Z",
-    "deletions": 1,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44252",
+    "created_at": "2026-02-24T13:00:59Z",
+    "deletions": 772,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44202/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44202",
+    "files_url": "https://github.com/huggingface/transformers/pull/44252/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44252",
     "labels": [],
     "merged": false,
-    "number": 44202,
-    "review_comments_count": 0,
+    "number": 44252,
+    "review_comments_count": 9,
+    "state": "open",
+    "title": "Timm unification continued",
+    "updated_at": "2026-02-26T13:35:44Z"
+  },
+  {
+    "additions": 1951,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Model Page: https://huggingface.co/jinaai/jina-embeddings-v3 Model Paper: https://huggingface.co/papers/2409.10173 Downloads last month > **5.3M** Completes Part of https://github.com/huggingface/transformers/issues\u2026",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 29,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44251",
+    "created_at": "2026-02-24T12:56:24Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44251",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 44251,
+    "review_comments_count": 74,
     "state": "closed",
-    "title": "Fix: bidirectional mask skip when attention dropout is active (#44188)",
-    "updated_at": "2026-03-09T10:31:41Z"
+    "title": "Add `Jina-Embeddings-V3` Model",
+    "updated_at": "2026-03-19T10:07:57Z"
   },
   {
-    "additions": 18,
-    "author": "tarekziade",
+    "additions": 5,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? main is currently failing with ``` FAILED tests/models/higgs_audio_v2/test_modeling_higgs_audio_v2.py::HiggsAudioV2ModelTest::test_generate_compilation_all_outputs - AssertionError: Lists differ: [torch.Size([2, 15,\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/pull/43806#discussion_r2834269455. We removed `self.report_to == \"all\"` functionality by mistake. Adding it back !",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44201",
-    "created_at": "2026-02-21T10:03:41Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44250",
+    "created_at": "2026-02-24T12:38:21Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44201/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44201",
+    "files_url": "https://github.com/huggingface/transformers/pull/44250/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44250",
     "labels": [],
     "merged": true,
-    "number": 44201,
-    "review_comments_count": 3,
+    "number": 44250,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: HiggsAudioV2 cached decode inputs in compiled generation",
-    "updated_at": "2026-02-23T12:39:19Z"
+    "title": "fix regression report_to \"all\"",
+    "updated_at": "2026-02-24T12:55:06Z"
   },
   {
-    "additions": 3,
-    "author": "pragnyanramtha",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43782 The `weights_only` parameter passed to `from_pretrained()` was not being forwarded to `load_state_dict()` when loading `.bin` checkpoint files in the non-DeepSpeed code path. This caused `weights_only` to always default to `Tr\u2026",
+    "additions": 9,
+    "author": "Ryan-J-MAX",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR fix? This PR adds backward compatibility for the deprecated `grouped_entities` parameter in the `TokenClassificationPipeline`. ## Problem The `grouped_entities` parameter was deprecated in favor of `aggregation_strateg\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44200",
-    "created_at": "2026-02-21T06:24:17Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44249",
+    "created_at": "2026-02-24T10:48:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44200/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44200",
+    "files_url": "https://github.com/huggingface/transformers/pull/44249/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44249",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44200,
+    "number": 44249,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: propagate `weights_only` param to `load_state_dict` in .bin loading path (#43782)",
-    "updated_at": "2026-02-23T14:20:12Z"
+    "title": "fix: add backward compatibility for grouped_entities parameter",
+    "updated_at": "2026-02-24T12:31:26Z"
   },
   {
-    "additions": 3,
-    "author": "gowthamr-tech",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR fixes an issue in `run_image_classification_no_trainer.py` where the script always loaded `dataset_name` (e.g., CIFAR10) even when `--train_dir` or `--validation_dir` was provided. Now, when local dataset d\u2026",
+    "additions": 12,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix backward compatibility with remote code for old processors not defining valid_kwargs (e.g. phi4) Cc @zucchini-nlp Fix `test_processor_override` for phi3v and phi4 in vllm @hmellor",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44199",
-    "created_at": "2026-02-21T06:03:29Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44245",
+    "created_at": "2026-02-23T21:47:19Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44199/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44199",
+    "files_url": "https://github.com/huggingface/transformers/pull/44245/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44245",
     "labels": [],
     "merged": true,
-    "number": 44199,
-    "review_comments_count": 0,
+    "number": 44245,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix local dataset loading priority in run_image_classification_no_tra\u2026",
-    "updated_at": "2026-02-24T15:10:17Z"
+    "title": "Fix image processors `from_dict` backward compatibility with old remote code",
+    "updated_at": "2026-02-24T15:17:37Z"
   },
   {
-    "additions": 71,
-    "author": "danielalanbates",
+    "additions": 63,
+    "author": "thakoreh",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #43975 ## Summary This PR fixes: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detokenizes in v5 ## Changes ``` src/transformers/tokenization_utils_tokenizers.py | 12 ++++- tests/models/llama/test_tokenization_llama.py | 60\u2026",
+    "body_excerpt": "## Summary Fixes #44242 Load balancing loss was not being added when `output_router_logits=False` in Mixtral models. ## Changes - Fixed loss calculation to include load balancing even when router logits are not output - Added test case ##\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44198",
-    "created_at": "2026-02-21T04:54:47Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44243",
+    "created_at": "2026-02-23T21:27:09Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44198/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44198",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44243/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44243",
+    "labels": [],
     "merged": false,
-    "number": 44198,
+    "number": 44243,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #43975: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detok",
-    "updated_at": "2026-02-23T14:10:47Z"
+    "title": "fix: add load balancing loss when output_router_logits=False",
+    "updated_at": "2026-02-23T21:54:11Z"
   },
   {
-    "additions": 37,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43937 ## Summary This PR fixes: [GLM-5] ValueError: GenerationConfig is invalid ## Changes ``` src/transformers/generation/configuration_utils.py | 13 +++++++++++- src/transformers/modeling_utils.py | 2 +- tests/generation/test_conf\u2026",
-    "changed_files": 3,
+    "additions": 9,
+    "author": "yushiran",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Adds missing `-> bool`, `-> int`, and `-> str | None` return type annotations to public utility functions in `utils/generic.py`, making them consistent with the newer `is_timm_config_dict` and `is_timm_local_checkpoint` function\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44197",
-    "created_at": "2026-02-21T04:47:32Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44241",
+    "created_at": "2026-02-23T19:50:05Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44197/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44197",
+    "files_url": "https://github.com/huggingface/transformers/pull/44241/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44241",
     "labels": [],
-    "merged": false,
-    "number": 44197,
+    "merged": true,
+    "number": 44241,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #43937: [GLM-5] ValueError: GenerationConfig is invalid",
-    "updated_at": "2026-02-23T09:42:54Z"
+    "title": "fix: add missing return type annotations to type-checking utilities in generic.py",
+    "updated_at": "2026-02-24T13:27:11Z"
   },
   {
-    "additions": 12,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43881 ## Summary This PR fixes: glm-4v-9b loading failed ## Changes ``` src/transformers/configuration_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) ``` ## Testing Please review the changes carefully. T\u2026",
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Makes sure `find_bad_commit` always return the result `dict`",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44196",
-    "created_at": "2026-02-21T04:41:02Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44240",
+    "created_at": "2026-02-23T19:12:49Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44196/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44196",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44196,
+    "files_url": "https://github.com/huggingface/transformers/pull/44240/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44240",
+    "labels": [],
+    "merged": true,
+    "number": 44240,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #43881: glm-4v-9b loading failed",
-    "updated_at": "2026-02-23T09:45:03Z"
+    "title": "Fix return value - fixes #44238",
+    "updated_at": "2026-02-24T13:02:59Z"
   },
   {
-    "additions": 2,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44062 ## Summary This PR fixes: TypeError: tokenizers.AddedToken() got multiple values for keyword argument 'special' ## Changes ``` src/transformers/tokenization_utils_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-\u2026",
+    "additions": 253,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "part 2 of refactoring the training docs adds new dedicated guide to callbacks and data collators todo: - [x] backlink to `## Next steps` in `trainer.md` once https://github.com/huggingface/transformers/pull/44185 is merged",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44239",
+    "created_at": "2026-02-23T18:54:55Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44239/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44239",
+    "labels": [],
+    "merged": true,
+    "number": 44239,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "[docs] callbacks and collators",
+    "updated_at": "2026-02-24T22:12:46Z"
+  },
+  {
+    "additions": 1,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? small nit but will be misleading if not fixed",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44195",
-    "created_at": "2026-02-21T04:38:14Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44237",
+    "created_at": "2026-02-23T17:52:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44195/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44195",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44195,
+    "files_url": "https://github.com/huggingface/transformers/pull/44237/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44237",
+    "labels": [],
+    "merged": true,
+    "number": 44237,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #44062: TypeError: tokenizers.AddedToken() got multiple values for k",
-    "updated_at": "2026-02-23T14:10:30Z"
+    "title": "[mimi] nit",
+    "updated_at": "2026-02-24T15:43:55Z"
   },
   {
-    "additions": 16,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44075 ## Summary This PR fixes: Optimizer SGD args are not used ## Changes ``` src/transformers/trainer_optimizer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ``` ## Testing Please review the changes carefully. The fix\u2026",
-    "changed_files": 1,
+    "additions": 109,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43847 When using zero3 + from_config, the model was incorrectly initialized as we were not gathering the params. Added a test also. cc @tohtana",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44194",
-    "created_at": "2026-02-21T04:35:53Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44236",
+    "created_at": "2026-02-23T17:20:01Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44194/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44194",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44194,
+    "files_url": "https://github.com/huggingface/transformers/pull/44236/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44236",
+    "labels": [],
+    "merged": true,
+    "number": 44236,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #44075: Optimizer SGD args are not used",
-    "updated_at": "2026-02-23T14:10:20Z"
+    "title": "fix zero3 init config",
+    "updated_at": "2026-02-27T11:36:19Z"
   },
   {
-    "additions": 2,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43986 ## Summary This PR fixes: Confusing crash when loading a video model through AutoProcessor without torchvision installed ## Changes ``` src/transformers/models/auto/video_processing_auto.py | 2 ++ 1 file changed, 2 insertions(\u2026",
+    "additions": 1,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "UPDATE TO: https://github.com/huggingface/transformers/pull/44179/changes Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44193",
-    "created_at": "2026-02-21T04:34:37Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44235",
+    "created_at": "2026-02-23T17:06:54Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44193/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44193",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44193,
+    "files_url": "https://github.com/huggingface/transformers/pull/44235/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44235",
+    "labels": [],
+    "merged": true,
+    "number": 44235,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix #43986: Confusing crash when loading a video model through AutoProce",
-    "updated_at": "2026-02-23T09:46:15Z"
+    "title": "update fuyu tokenizer class",
+    "updated_at": "2026-02-23T17:36:22Z"
   },
   {
-    "additions": 3,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44079 ## Summary This PR fixes: `ModelOutput` keys aren't correctly assigned if key was previously None ## Changes ``` src/transformers/utils/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) ``` ## Testing Please r\u2026",
-    "changed_files": 1,
+    "additions": 249,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Cc @zucchini-nlp",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44192",
-    "created_at": "2026-02-21T04:33:52Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44234",
+    "created_at": "2026-02-23T17:03:05Z",
+    "deletions": 55,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44192/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44192",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44192,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44234/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44234",
+    "labels": [],
+    "merged": true,
+    "number": 44234,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix #44079: `ModelOutput` keys aren't correctly assigned if key was prev",
-    "updated_at": "2026-02-23T14:10:14Z"
+    "title": "Add processing tests for phi4 multimodal",
+    "updated_at": "2026-02-23T22:08:11Z"
   },
   {
-    "additions": 95,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44155 ## Summary This PR fixes: [AudioFlamingo3] Batched inference produces incorrect results due to embedding/token leak between tracks ## Changes ``` .../audioflamingo3/modeling_audioflamingo3.py | 51 +++++++++++++++++++--- .../au\u2026",
-    "changed_files": 3,
+    "additions": 219,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Extends `ty` coverage to `src/transformers/generation` - Added a dedicated type-check wrapper script: `utils/check_types.py`. - Updated `Makefile` to run `ty` checks through the wrapper in both `style` and `check-repo`. - merged all typing\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44191",
-    "created_at": "2026-02-21T04:32:30Z",
-    "deletions": 11,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44233",
+    "created_at": "2026-02-23T16:23:24Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44191/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44191",
-    "labels": [
-      "Audio"
-    ],
-    "merged": false,
-    "number": 44191,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44233/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44233",
+    "labels": [],
+    "merged": true,
+    "number": 44233,
+    "review_comments_count": 33,
     "state": "closed",
-    "title": "Fix #44155: [AudioFlamingo3] Batched inference produces incorrect result",
-    "updated_at": "2026-03-19T16:16:17Z"
+    "title": "chore(typing): Add type checking to `src/transformers/generation`",
+    "updated_at": "2026-03-04T17:24:37Z"
   },
   {
-    "additions": 3,
-    "author": "excepshenal",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Under fp16_full_eval or bf16_full_eval, still don't move model to device if using another dist train backend. This is causing bugs with FSDP2 + bf16_full_eval. The dist train backend would still be in charge of movi\u2026",
-    "changed_files": 1,
+    "additions": 11,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? per https://code.claude.com/docs/en/claude-code-on-the-web#best-practices `CLAUDE.md` can alias directly into `AGENTS.md`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44189",
-    "created_at": "2026-02-21T00:06:16Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44232",
+    "created_at": "2026-02-23T16:10:15Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44189/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44189",
+    "files_url": "https://github.com/huggingface/transformers/pull/44232/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44232",
     "labels": [],
-    "merged": false,
-    "number": 44189,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: don't move model to device under other dist train backends",
-    "updated_at": "2026-02-21T00:06:16Z"
+    "merged": true,
+    "number": 44232,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "chore: added CLAUDE.md alias",
+    "updated_at": "2026-02-24T14:48:36Z"
   },
   {
-    "additions": 3,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 The NER/token classification issue and the downstream bug uncovered in the batched preprocessing use case with `LayoutLMv2Tokenizer`. \u2192 **Reasoning:** T\u2026",
-    "changed_files": 1,
+    "additions": 413,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44187",
-    "created_at": "2026-02-20T20:02:04Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44231",
+    "created_at": "2026-02-23T15:45:47Z",
+    "deletions": 578,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44187/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44187",
+    "files_url": "https://github.com/huggingface/transformers/pull/44231/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44231",
     "labels": [],
     "merged": true,
-    "number": 44187,
-    "review_comments_count": 0,
+    "number": 44231,
+    "review_comments_count": 18,
     "state": "closed",
-    "title": "fix(models): Fix LayoutLMv2 NER crash and broken batched truncation/padding",
-    "updated_at": "2026-02-23T10:30:51Z"
+    "title": "[Performance] FP8 Grouped and Batched Matmuls",
+    "updated_at": "2026-03-11T08:51:02Z"
   },
   {
-    "additions": 361,
-    "author": "stevhliu",
+    "additions": 4,
+    "author": "alvarobartt",
     "author_association": "MEMBER",
-    "body_excerpt": "part 1 of refactoring the `Trainer` docs - restructure the `toctree` a bit to accommodate new sections and docs - slim down `trainer.md` to be a clearer entry point (will expand the `## Next steps` section as we continue for better navigat\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? This PR adds the missing backtick (`) on the `AnyToAnyPipeline.__call__` docstrings, as those were showing as in the screenshot below instead. <img width=\"1023\" height=\"400\" alt=\"image\" src=\"https://github.com/user-\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44185",
-    "created_at": "2026-02-20T19:25:07Z",
-    "deletions": 578,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44229",
+    "created_at": "2026-02-23T15:25:47Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44185/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44185",
+    "files_url": "https://github.com/huggingface/transformers/pull/44229/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44229",
     "labels": [],
     "merged": true,
-    "number": 44185,
-    "review_comments_count": 19,
+    "number": 44229,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] trainer part 1",
-    "updated_at": "2026-02-24T21:18:42Z"
+    "title": "Add missing backtick in `AnyToAnyPipeline.__call__` docstring",
+    "updated_at": "2026-02-23T19:21:08Z"
   },
   {
-    "additions": 191,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR implements the initial architecture for CircuitGPT (based on OpenAI's research), as discussed in #44121. Key implementations: SparseLinear: Custom layer with Top-K weight sparsity logic. CircuitGpt Components: Attention, MLP, and C\u2026",
-    "changed_files": 3,
+    "additions": 35,
+    "author": "JonoLF",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44184",
-    "created_at": "2026-02-20T16:58:27Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44228",
+    "created_at": "2026-02-23T15:09:05Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44184/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44184",
+    "files_url": "https://github.com/huggingface/transformers/pull/44228/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44228",
     "labels": [],
     "merged": false,
-    "number": 44184,
+    "number": 44228,
     "review_comments_count": 0,
     "state": "open",
-    "title": "feat: add OpenAI CircuitGPT core architecture and sparse linear layers",
-    "updated_at": "2026-02-20T17:18:44Z"
+    "title": "[Quantisation] account for nested tensors from quantisers",
+    "updated_at": "2026-03-17T11:57:53Z"
   },
   {
-    "additions": 1,
-    "author": "Rocketknight1",
+    "additions": 21,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "Our code has some references to the `grouped_entities` arg to the token classification pipeline, but this is no longer usable. This PR cleans them up entirely! Fixes #44016",
-    "changed_files": 2,
+    "body_excerpt": "This PR adds a logging message when infering the behavior of use async and fixes an error when evicting a graph from the graph buffer.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44182",
-    "created_at": "2026-02-20T15:28:26Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44227",
+    "created_at": "2026-02-23T14:53:53Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44182/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44182",
+    "files_url": "https://github.com/huggingface/transformers/pull/44227/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44227",
     "labels": [],
     "merged": true,
-    "number": 44182,
-    "review_comments_count": 0,
+    "number": 44227,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "Remove refs to grouped_entities",
-    "updated_at": "2026-02-24T16:07:24Z"
+    "title": "[CB] Small fixes",
+    "updated_at": "2026-03-03T13:40:10Z"
   },
   {
-    "additions": 898,
+    "additions": 86,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title! Follow up of https://github.com/huggingface/transformers/pull/44130 and https://github.com/huggingface/transformers/pull/44226. Finally remove the `cache_position` everywhere (not ALL models, but a\u2026",
-    "changed_files": 169,
+    "body_excerpt": "# What does this PR do? As per the title. It looks like some models (xlnet and kosmos2_5) and most audio models sometimes rely on the full previous input_ids to prepare inputs. Note that this cannot be compatible with restarting generation\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44181",
-    "created_at": "2026-02-20T15:24:39Z",
-    "deletions": 2698,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44226",
+    "created_at": "2026-02-23T13:27:23Z",
+    "deletions": 66,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44181/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44181",
+    "files_url": "https://github.com/huggingface/transformers/pull/44226/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44226",
     "labels": [],
     "merged": true,
-    "number": 44181,
-    "review_comments_count": 32,
+    "number": 44226,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "[core] \ud83d\udea8 Completely remove cache positions",
-    "updated_at": "2026-03-04T18:08:42Z"
+    "title": "[generate] Always pass full input_ids in `prepare_inputs_for_generation`",
+    "updated_at": "2026-02-24T10:45:49Z"
   },
   {
-    "additions": 28,
-    "author": "tarekziade",
+    "additions": 169,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, we weren't running these test for some time because they were being filtered into `non-model` tests. But `non-model` doesn't run tests that are marked as generation, so I moved it back to `generation`\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44225",
+    "created_at": "2026-02-23T12:09:40Z",
+    "deletions": 270,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44225/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44225",
+    "labels": [],
+    "merged": true,
+    "number": 44225,
+    "review_comments_count": 18,
+    "state": "closed",
+    "title": "Fix generation integration tests",
+    "updated_at": "2026-02-25T09:19:39Z"
+  },
+  {
+    "additions": 5,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes a flaky test in IdeficsForVisionText2TextTest::test_generate_continue_from_inputs_embeds. The flakiness can be reproduced with: ``` pytest -q -p no:rerunfailures --flake-finder --flake-runs=20 \\ tests/models/i\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Small fixes after https://github.com/huggingface/transformers/pull/44130. See https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/8785954cca2fdca181de0b9567059471bcadb959/2026-02-21/ci_resu\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44180",
-    "created_at": "2026-02-20T14:30:46Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44224",
+    "created_at": "2026-02-23T10:48:19Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44180/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44180",
+    "files_url": "https://github.com/huggingface/transformers/pull/44224/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44224",
     "labels": [],
-    "merged": true,
-    "number": 44180,
+    "merged": false,
+    "number": 44224,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(flaky): idefics generate cache flake",
-    "updated_at": "2026-02-26T16:18:18Z"
+    "title": "Small fixes",
+    "updated_at": "2026-02-24T10:06:14Z"
   },
   {
-    "additions": 27,
-    "author": "itazap",
+    "additions": 1,
+    "author": "albertvillanova",
     "author_association": "MEMBER",
-    "body_excerpt": "Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
+    "body_excerpt": "Fix type of `TrainingArguments.logging_steps`. This PR makes a minor update to the `TrainingArguments` class, so `logging_steps` parameter accepts both integers and floats, rather than only floats. Note these are the expected types in the\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44179",
-    "created_at": "2026-02-20T13:51:44Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44223",
+    "created_at": "2026-02-23T08:50:04Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44179/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44179",
+    "files_url": "https://github.com/huggingface/transformers/pull/44223/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44223",
     "labels": [],
-    "merged": true,
-    "number": 44179,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 44223,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Models with incorrect tokenizer_class in tokenization_config.json tha\u2026",
-    "updated_at": "2026-02-23T08:33:13Z"
+    "title": "Fix type of TrainingArguments.logging_steps",
+    "updated_at": "2026-02-23T09:08:18Z"
   },
   {
-    "additions": 2940,
-    "author": "ebezzam",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-opening https://github.com/huggingface/transformers/pull/37868 TODO - [x] recompute expected outputs - [x] passthrough code given new conventions - [x] check for unused code paths / configuration parameters Origi\u2026",
-    "changed_files": 27,
+    "additions": 1,
+    "author": "matisgagneux21",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - fix a typo in the Italian add-new-model guide: `docstirng` -> `docstring`. ## Why Small docs quality fix that avoids confusion for readers following the contribution guide.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44178",
-    "created_at": "2026-02-20T12:36:21Z",
-    "deletions": 48,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44219",
+    "created_at": "2026-02-23T00:43:59Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44178/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44178",
+    "files_url": "https://github.com/huggingface/transformers/pull/44219/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44219",
     "labels": [],
-    "merged": false,
-    "number": 44178,
-    "review_comments_count": 8,
-    "state": "open",
-    "title": "Add xcodec2 model",
-    "updated_at": "2026-04-13T08:07:13Z"
+    "merged": true,
+    "number": 44219,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Docs(it): fix typo in docstring wording",
+    "updated_at": "2026-02-23T15:04:51Z"
   },
   {
-    "additions": 41,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, spiritual successor to #44081 Why? Because as is - Only defaults for fa2/fa3, not on other requested kernels - Limits implementations to one kernel/implementation while I suspect that there will be multiple viable versions (i\u2026",
-    "changed_files": 7,
+    "additions": 1,
+    "author": "matisgagneux21",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - fix a typo in the Italian migration guide install command: - `stentencepiece` -> `sentencepiece` ## Why The current command fails if copied as-is. This makes the installation snippet runnable for users reading the Italian docs.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44177",
-    "created_at": "2026-02-20T12:13:30Z",
-    "deletions": 71,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44218",
+    "created_at": "2026-02-23T00:32:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44177/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44177",
+    "files_url": "https://github.com/huggingface/transformers/pull/44218/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44218",
     "labels": [],
     "merged": true,
-    "number": 44177,
-    "review_comments_count": 2,
+    "number": 44218,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Flash Attn`] Enable compatible implementations",
-    "updated_at": "2026-02-20T12:43:35Z"
+    "title": "Docs(it): fix typo in sentencepiece install command",
+    "updated_at": "2026-02-23T15:05:17Z"
   },
   {
-    "additions": 271,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Our kernel loading is incompatible with the original packages as they do not expose the same import structure: - Kernels seem to expose things in the init (and not in the original path) - Original packages seem to expose only within their\u2026",
-    "changed_files": 14,
+    "additions": 1,
+    "author": "matisgagneux21",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Update the Italian migration guide to avoid pointing users to the deprecated `grouped_entities` flag. - Clarify that `aggregation_strategy` is the current option (with a note that it was previously `grouped_entities`). ## Why\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44176",
-    "created_at": "2026-02-20T11:36:01Z",
-    "deletions": 124,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44217",
+    "created_at": "2026-02-23T00:10:48Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44176/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44176",
+    "files_url": "https://github.com/huggingface/transformers/pull/44217/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44217",
     "labels": [],
-    "merged": true,
-    "number": 44176,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44217,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Mamba`] Fix kernel loading",
-    "updated_at": "2026-02-20T16:19:06Z"
+    "title": "Docs(it): update deprecated grouped_entities reference",
+    "updated_at": "2026-02-23T14:56:19Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC) for vllm: https://buildkite.com/vllm/ci/builds/52260/steps/canvas?sid=019c76ad-c8f2-4e59-a2f4-5f3b5bbc204c&tab=output",
+    "additions": 13,
+    "author": "nikste",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Automated fix for #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook Fixes #44016 ## What does this PR do? This PR addresses issue #44016 by implementing the fix described in the issue. ---\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44175",
-    "created_at": "2026-02-20T11:00:18Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44216",
+    "created_at": "2026-02-22T23:40:56Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44175/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44175",
+    "files_url": "https://github.com/huggingface/transformers/pull/44216/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44216",
     "labels": [],
     "merged": false,
-    "number": 44175,
+    "number": 44216,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC)",
-    "updated_at": "2026-02-20T16:19:31Z"
+    "title": "Fix #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook",
+    "updated_at": "2026-02-22T23:50:57Z"
   },
   {
-    "additions": 1367,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "This draft expands `utils/check_modeling_structure.py` into a rule-driven linter for model code, with new checks and tests, while keeping runtime very low. Key features: - The checker is intentionally AST-only (no heavy imports/execution),\u2026",
-    "changed_files": 4,
+    "additions": 187,
+    "author": "jmriosal",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? Add sequence classification capabilities to the family of Granite models (Granite, GraniteMoe, GraniteMoeHybrid, and GraniteMoeShared). Fixes #44214, #35720 ## Why The Granite models currently only have the base mod\u2026",
+    "changed_files": 17,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44174",
-    "created_at": "2026-02-20T10:38:11Z",
-    "deletions": 24,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44215",
+    "created_at": "2026-02-22T23:24:43Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44174/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44174",
+    "files_url": "https://github.com/huggingface/transformers/pull/44215/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44215",
     "labels": [],
-    "merged": true,
-    "number": 44174,
-    "review_comments_count": 38,
-    "state": "closed",
-    "title": "Expand model-structure lint rules with a fast AST-based, ruff-like framework",
-    "updated_at": "2026-03-12T06:42:21Z"
+    "merged": false,
+    "number": 44215,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add sequence classification capability to Granite models",
+    "updated_at": "2026-02-24T20:39:37Z"
   },
   {
-    "additions": 20,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes flaky GLM OCR generation behavior when 2D `position_ids` are passed explicitly. Reproducible locally with: ``` pytest tests/models/glm_ocr/test_modeling_glm_ocr.py::GlmOcrModelTest::test_generate_with_and_without_position_ids --flake\u2026",
+    "additions": 70,
+    "author": "parthchopra07",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR refreshes the BEiT model documentation to align it with the current Transformers vision docs style and features. It updates the usage examples, clarifies configuration details, and improves the resources sec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44173",
-    "created_at": "2026-02-20T09:28:48Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44213",
+    "created_at": "2026-02-22T18:32:16Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44173/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44173",
+    "files_url": "https://github.com/huggingface/transformers/pull/44213/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44213",
     "labels": [],
-    "merged": true,
-    "number": 44173,
-    "review_comments_count": 10,
+    "merged": false,
+    "number": 44213,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(flaky): `test_generate_with_and_without_position_ids` in GLM ORC",
-    "updated_at": "2026-02-20T19:06:19Z"
+    "title": "Update BEiT model card",
+    "updated_at": "2026-02-28T14:33:57Z"
   },
   {
-    "additions": 2,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary I've noticed `test_synthidtext_watermark_processor_distributional_convergence_*` was our slowest tests in CircleCI This PR speeds up the slowest SynthID distributional convergence tests by optimizing a hot path in SynthIDTextWat\u2026",
+    "additions": 1,
+    "author": "alexandercarruthers",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Updates a broken link in the summarization guide. https://huggingface.co/docs/transformers/tasks/summarization https://huggingface.co/billsum/datasets results in a 404. New URL is https://huggingface.co/datasets/Fis\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44172",
-    "created_at": "2026-02-20T08:59:30Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44212",
+    "created_at": "2026-02-22T18:02:43Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44172/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44172",
+    "files_url": "https://github.com/huggingface/transformers/pull/44212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44212",
     "labels": [],
     "merged": true,
-    "number": 44172,
+    "number": 44212,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "perf: Optimize SynthID logits processor batch index construction",
-    "updated_at": "2026-02-27T09:32:43Z"
+    "title": "Update 404ing BillSum dataset URL on Summarization Task guide",
+    "updated_at": "2026-02-23T14:46:11Z"
   },
   {
-    "additions": 2449,
-    "author": "lmaksym",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds TDT decoder support for Parakeet ASR models, extending the existing CTC-only implementation. It incorporates the initial TDT integration work from [#41545](https://github.com/huggingface/transformers/pu\u2026",
-    "changed_files": 28,
+    "additions": 10,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Fix for #44016 The `grouped_entities` parameter in `TokenClassificationPipeline._sanitize_parameters` was removed without a deprecation period, causing a `TypeError` when users pass `grouped_entities=True` to the `pipeline()` call (as s\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44171",
-    "created_at": "2026-02-20T08:44:46Z",
-    "deletions": 294,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44211",
+    "created_at": "2026-02-22T17:04:50Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44171/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44171",
+    "files_url": "https://github.com/huggingface/transformers/pull/44211/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44211",
     "labels": [
-      "New model",
-      "Audio"
+      "Code agent slop"
     ],
     "merged": false,
-    "number": 44171,
-    "review_comments_count": 66,
-    "state": "open",
-    "title": "Parakeet tdt",
-    "updated_at": "2026-03-26T18:00:35Z"
+    "number": 44211,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add backward compatibility for deprecated grouped_entities parameter",
+    "updated_at": "2026-02-23T16:26:02Z"
   },
   {
-    "additions": 74,
-    "author": "veeceey",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Adds `GitForCausalLM` to `MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES` so GIT can be used with the `visual-question-answering` pipeline - Filters tokenizer outputs in VQA pipeline `preprocess` to only pass keys accepted\u2026",
-    "changed_files": 4,
+    "additions": 1,
+    "author": "nightcityblade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44206 ## Problem PR #43769 (\"Add Voxtral Realtime\") added a `center` parameter to `LasrFeatureExtractor.__call__()` and passed it to `_torch_extract_fbank_features()`, but that method does not accept it. This causes a `TypeError` on\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44170",
-    "created_at": "2026-02-20T08:28:05Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44210",
+    "created_at": "2026-02-22T16:06:16Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44170/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44170",
+    "files_url": "https://github.com/huggingface/transformers/pull/44210/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44210",
     "labels": [],
     "merged": false,
-    "number": 44170,
+    "number": 44210,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add GIT model support in VQA pipeline",
-    "updated_at": "2026-02-20T09:34:31Z"
+    "title": "fix(lasr): remove spurious center arg from _torch_extract_fbank_features call",
+    "updated_at": "2026-02-23T14:32:41Z"
   },
   {
-    "additions": 415,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Initial ty integration. To avoid a gigantic, risky patch, let's start with a baby step where we add the tooling to make repo-check and activate it on a subset of the repo. That gives us a human-readable patch, and allows us to get conforta\u2026",
-    "changed_files": 25,
+    "additions": 197,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? This PR fixes a critical bug in LayoutLMv2Tokenizer where passing `word_labels` for NER token classification tasks would crash with `AttributeError`. The issue was that `word_ids` and `sequence_ids` were being acce\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44167",
-    "created_at": "2026-02-20T07:39:44Z",
-    "deletions": 210,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44209",
+    "created_at": "2026-02-22T14:37:25Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44167/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44167",
+    "files_url": "https://github.com/huggingface/transformers/pull/44209/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44209",
     "labels": [],
-    "merged": true,
-    "number": 44167,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44209,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(typing): initial ty integration",
-    "updated_at": "2026-02-20T19:08:09Z"
+    "title": "Fix LayoutLMv2Tokenizer NER crashes with word_labels",
+    "updated_at": "2026-02-23T10:30:26Z"
   },
   {
-    "additions": 73,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "The CI does not output useful info on this flaky test - `tests.models.olmo.test_modeling_olmo.OlmoModelTest testMethod=test_generate_with_static_cache` and makes it harder to determine the root problem when not reproducible locally. This p\u2026",
-    "changed_files": 8,
+    "additions": 1,
+    "author": "ainergiz",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes a LASR regression introduced in `#43769` (released in `v5.2.0`). `LasrFeatureExtractor.__call__` passes `center` into `_torch_extract_fbank_features(...)`, but `_torch_extract_fbank_features` did not a\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44166",
-    "created_at": "2026-02-20T07:20:15Z",
-    "deletions": 61,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44207",
+    "created_at": "2026-02-21T20:56:49Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44166/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44166",
+    "files_url": "https://github.com/huggingface/transformers/pull/44207/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44207",
     "labels": [],
     "merged": true,
-    "number": 44166,
-    "review_comments_count": 4,
+    "number": 44207,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve `has_similar_generate_outputs` assertions",
-    "updated_at": "2026-02-27T08:26:13Z"
+    "title": "Fix LASR feature extractor regression from invalid center argument",
+    "updated_at": "2026-02-23T10:01:35Z"
   },
   {
-    "additions": 29,
-    "author": "alexmalyshev",
+    "additions": 1,
+    "author": "nightcityblade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "With Python 3.14 and PEP 649, you can no longer expect `cls.__dict__[\"__annotations__\"]` to contain annotations for the exact class, it will be loaded lazily and can make it seem like the class doesn't have any annotations. The recommended\u2026",
+    "body_excerpt": "## What does this PR do? Fixes #44186 `LayoutLMv2Tokenizer.__init__` passes `only_label_first_subword` to `super().__init__()` but never stores it as `self.only_label_first_subword`. This causes an `AttributeError` when `word_labels` is pa\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44165",
-    "created_at": "2026-02-20T04:47:07Z",
-    "deletions": 14,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44204",
+    "created_at": "2026-02-21T16:06:46Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44165/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44165",
+    "files_url": "https://github.com/huggingface/transformers/pull/44204/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44204",
     "labels": [],
     "merged": false,
-    "number": 44165,
+    "number": 44204,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix how PreTrainedModel checks annotations on Python 3.14+",
-    "updated_at": "2026-03-20T17:31:11Z"
+    "title": "fix(layoutlmv2): store only_label_first_subword attribute in tokenizer",
+    "updated_at": "2026-02-23T10:30:19Z"
   },
   {
-    "additions": 7,
-    "author": "lhallee",
+    "additions": 22,
+    "author": "nightcityblade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes: https://github.com/huggingface/transformers/issues/44162 @ArthurZucker @Cyrilvallez",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #44075 `_get_sgd()`, `_get_adagrad()`, and `_get_rmsprop()` in `trainer_optimizer.py` only returned `ctx.optimizer_kwargs` (which contains just `lr`), completely ignoring `ctx.optim_args`. This meant that parameters specif\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44163",
-    "created_at": "2026-02-19T21:44:25Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44203",
+    "created_at": "2026-02-21T15:12:17Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44163/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44163",
+    "files_url": "https://github.com/huggingface/transformers/pull/44203/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44203",
     "labels": [],
     "merged": true,
-    "number": 44163,
+    "number": 44203,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "ESM2 attention_mask and token_dropout fix",
-    "updated_at": "2026-02-20T15:17:31Z"
+    "title": "fix(trainer): pass optim_args to SGD, Adagrad, and RMSprop optimizers",
+    "updated_at": "2026-02-25T16:04:20Z"
   },
   {
-    "additions": 379,
-    "author": "cogniera",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "What does this PR do? This PR refactors the LongT5 model to use the @capture_outputs and @can_return_tuple decorators for standardized output handling across the model stack. The refactor removes manual handling of: output_attentions outpu\u2026",
-    "changed_files": 1,
+    "additions": 63,
+    "author": "GS-GOAT",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? When `torch.compile` is used, [_ignore_bidirectional_mask_sdpa](cci:1://file:///c:/Users/BIT/Desktop/proj/gitrepo_clones/transformers/src/transformers/masking_utils.py:303:0-338:16) behaves differently than in eager\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44161",
-    "created_at": "2026-02-19T20:46:49Z",
-    "deletions": 170,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44202",
+    "created_at": "2026-02-21T13:45:15Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44161/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44161",
+    "files_url": "https://github.com/huggingface/transformers/pull/44202/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44202",
     "labels": [],
     "merged": false,
-    "number": 44161,
+    "number": 44202,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor LongT5 to use @capture_outputs and @can_return_tuple decorators for unified output handling (Fixes #43979)",
-    "updated_at": "2026-02-20T17:28:12Z"
+    "state": "closed",
+    "title": "Fix: bidirectional mask skip when attention dropout is active (#44188)",
+    "updated_at": "2026-03-09T10:31:41Z"
   },
   {
-    "additions": 2104,
-    "author": "molbap",
+    "additions": 18,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Hey yall, I started porting the pi0 model so Transformers can be a backend for VLAs directly. I checked it against LeRobot on fix/lerobot_openpi: outputs seem to match and for sure lerobot/pi0_base loads cleanly (no\u2026",
-    "changed_files": 22,
+    "body_excerpt": "# What does this PR do? main is currently failing with ``` FAILED tests/models/higgs_audio_v2/test_modeling_higgs_audio_v2.py::HiggsAudioV2ModelTest::test_generate_compilation_all_outputs - AssertionError: Lists differ: [torch.Size([2, 15,\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44160",
-    "created_at": "2026-02-19T17:16:29Z",
-    "deletions": 2,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44201",
+    "created_at": "2026-02-21T10:03:41Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44160/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44160",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44201/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44201",
+    "labels": [],
     "merged": true,
-    "number": 44160,
-    "review_comments_count": 32,
+    "number": 44201,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Add model lerobot PI0 to transformers",
-    "updated_at": "2026-03-16T10:23:14Z"
+    "title": "fix: HiggsAudioV2 cached decode inputs in compiled generation",
+    "updated_at": "2026-02-23T12:39:19Z"
   },
   {
-    "additions": 67,
-    "author": "samuelleecong",
+    "additions": 3,
+    "author": "pragnyanramtha",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Closes #28103 - Refactor `OwlViTAttention` to use `ALL_ATTENTION_FUNCTIONS` for dynamic attention backend dispatch (same pattern as CLIP) - Add `eager_attention_forward` standalone function with the standardized interface - Resh\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes #43782 The `weights_only` parameter passed to `from_pretrained()` was not being forwarded to `load_state_dict()` when loading `.bin` checkpoint files in the non-DeepSpeed code path. This caused `weights_only` to always default to `Tr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44159",
-    "created_at": "2026-02-19T16:31:44Z",
-    "deletions": 61,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44159/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44159",
-    "labels": [],
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44200",
+    "created_at": "2026-02-21T06:24:17Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44200/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44200",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44159,
+    "number": 44200,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add SDPA and Flash Attention support for OWL-ViT",
-    "updated_at": "2026-02-24T12:53:10Z"
+    "state": "closed",
+    "title": "fix: propagate `weights_only` param to `load_state_dict` in .bin loading path (#43782)",
+    "updated_at": "2026-02-23T14:20:12Z"
   },
   {
-    "additions": 141,
-    "author": "leopold-tzafon",
+    "additions": 3,
+    "author": "gowthamr-tech",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Fix issue where `use_cache=False`, corrupts model Qwen3vl output. Tested with: ``` import torch from transformers import Qwen3VLForConditionalGeneration, AutoProcessor MODEL_NAME = \"Qwen/Qwen3-VL-4B-Instruct\" DEVICE = \"cuda\" DTYPE = torc\u2026",
-    "changed_files": 9,
+    "body_excerpt": "## What does this PR do? This PR fixes an issue in `run_image_classification_no_trainer.py` where the script always loaded `dataset_name` (e.g., CIFAR10) even when `--train_dir` or `--validation_dir` was provided. Now, when local dataset d\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 19,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44158",
-    "created_at": "2026-02-19T15:45:13Z",
-    "deletions": 36,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44199",
+    "created_at": "2026-02-21T06:03:29Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44158/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44158",
+    "files_url": "https://github.com/huggingface/transformers/pull/44199/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44199",
     "labels": [],
     "merged": true,
-    "number": 44158,
+    "number": 44199,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix bug with position_ids on qwen3-vl models, such that position_ids include text position",
-    "updated_at": "2026-02-23T14:53:33Z"
+    "title": "Fix local dataset loading priority in run_image_classification_no_tra\u2026",
+    "updated_at": "2026-02-24T15:10:17Z"
   },
   {
-    "additions": 689,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, gets rid of `if/else` per attn implementation",
-    "changed_files": 24,
+    "additions": 71,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43975 ## Summary This PR fixes: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detokenizes in v5 ## Changes ``` src/transformers/tokenization_utils_tokenizers.py | 12 ++++- tests/models/llama/test_tokenization_llama.py | 60\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44157",
-    "created_at": "2026-02-19T14:49:49Z",
-    "deletions": 834,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44198",
+    "created_at": "2026-02-21T04:54:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44157/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44157",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44198/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44198",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44157,
-    "review_comments_count": 10,
-    "state": "open",
-    "title": "Use correct mask for packed inputs in Qwen-VL ",
-    "updated_at": "2026-02-24T13:13:43Z"
+    "number": 44198,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix #43975: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detok",
+    "updated_at": "2026-02-23T14:10:47Z"
   },
   {
-    "additions": 2,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes a minor error when using aqml quantization. We specified the wrong argument.",
-    "changed_files": 2,
+    "additions": 37,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43937 ## Summary This PR fixes: [GLM-5] ValueError: GenerationConfig is invalid ## Changes ``` src/transformers/generation/configuration_utils.py | 13 +++++++++++- src/transformers/modeling_utils.py | 2 +- tests/generation/test_conf\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44156",
-    "created_at": "2026-02-19T14:35:38Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44197",
+    "created_at": "2026-02-21T04:47:32Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44156/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44156",
+    "files_url": "https://github.com/huggingface/transformers/pull/44197/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44197",
     "labels": [],
     "merged": false,
-    "number": 44156,
+    "number": 44197,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix aqml `modules_to_not_convert`",
-    "updated_at": "2026-03-27T16:50:02Z"
+    "title": "Fix #43937: [GLM-5] ValueError: GenerationConfig is invalid",
+    "updated_at": "2026-02-23T09:42:54Z"
   },
   {
-    "additions": 44,
-    "author": "Aatman09",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 12,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43881 ## Summary This PR fixes: glm-4v-9b loading failed ## Changes ``` src/transformers/configuration_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) ``` ## Testing Please review the changes carefully. T\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44154",
-    "created_at": "2026-02-19T12:17:56Z",
-    "deletions": 52,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44196",
+    "created_at": "2026-02-21T04:41:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44154/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44154",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44196/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44196",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44154,
+    "number": 44196,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactored vits to match standardized output collection interface",
-    "updated_at": "2026-02-19T12:18:56Z"
+    "state": "closed",
+    "title": "Fix #43881: glm-4v-9b loading failed",
+    "updated_at": "2026-02-23T09:45:03Z"
   },
   {
-    "additions": 79,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "additions": 2,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44062 ## Summary This PR fixes: TypeError: tokenizers.AddedToken() got multiple values for keyword argument 'special' ## Changes ``` src/transformers/tokenization_utils_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44152",
-    "created_at": "2026-02-19T09:37:51Z",
-    "deletions": 45,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44195",
+    "created_at": "2026-02-21T04:38:14Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44152/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44152",
-    "labels": [],
-    "merged": true,
-    "number": 44152,
-    "review_comments_count": 2,
+    "files_url": "https://github.com/huggingface/transformers/pull/44195/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44195",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44195,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "AutoGrad support for grouped_mm fallback",
-    "updated_at": "2026-02-20T11:15:23Z"
+    "title": "Fix #44062: TypeError: tokenizers.AddedToken() got multiple values for k",
+    "updated_at": "2026-02-23T14:10:30Z"
   },
   {
-    "additions": 58,
-    "author": "ManasVardhan",
+    "additions": 16,
+    "author": "danielalanbates",
     "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the BioGPT m\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes #44075 ## Summary This PR fixes: Optimizer SGD args are not used ## Changes ``` src/transformers/trainer_optimizer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ``` ## Testing Please review the changes carefully. The fix\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44151",
-    "created_at": "2026-02-19T06:55:43Z",
-    "deletions": 134,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44194",
+    "created_at": "2026-02-21T04:35:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44151/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44151",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44194/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44194",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44151,
+    "number": 44194,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor BioGPT output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:01Z"
+    "title": "Fix #44075: Optimizer SGD args are not used",
+    "updated_at": "2026-02-23T14:10:20Z"
   },
   {
-    "additions": 22,
-    "author": "ManasVardhan",
+    "additions": 2,
+    "author": "danielalanbates",
     "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the MPT mode\u2026",
+    "body_excerpt": "Fixes #43986 ## Summary This PR fixes: Confusing crash when loading a video model through AutoProcessor without torchvision installed ## Changes ``` src/transformers/models/auto/video_processing_auto.py | 2 ++ 1 file changed, 2 insertions(\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44150",
-    "created_at": "2026-02-19T06:54:09Z",
-    "deletions": 73,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44193",
+    "created_at": "2026-02-21T04:34:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44150/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44150",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44193/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44193",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44150,
+    "number": 44193,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor MPT output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:02Z"
+    "title": "Fix #43986: Confusing crash when loading a video model through AutoProce",
+    "updated_at": "2026-02-23T09:46:15Z"
   },
   {
-    "additions": 85,
-    "author": "ManasVardhan",
+    "additions": 3,
+    "author": "danielalanbates",
     "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CPMAnt m\u2026",
-    "changed_files": 4,
+    "body_excerpt": "Fixes #44079 ## Summary This PR fixes: `ModelOutput` keys aren't correctly assigned if key was previously None ## Changes ``` src/transformers/utils/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) ``` ## Testing Please r\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44149",
-    "created_at": "2026-02-19T06:51:06Z",
-    "deletions": 201,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44192",
+    "created_at": "2026-02-21T04:33:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44149/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44149",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44192/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44192",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44149,
+    "number": 44192,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor CPMAnt output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:03Z"
+    "title": "Fix #44079: `ModelOutput` keys aren't correctly assigned if key was prev",
+    "updated_at": "2026-02-23T14:10:14Z"
   },
   {
-    "additions": 33,
-    "author": "ManasVardhan",
+    "additions": 95,
+    "author": "danielalanbates",
     "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the Bros mod\u2026",
-    "changed_files": 4,
+    "body_excerpt": "Fixes #44155 ## Summary This PR fixes: [AudioFlamingo3] Batched inference produces incorrect results due to embedding/token leak between tracks ## Changes ``` .../audioflamingo3/modeling_audioflamingo3.py | 51 +++++++++++++++++++--- .../au\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44148",
-    "created_at": "2026-02-19T06:46:24Z",
-    "deletions": 124,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44191",
+    "created_at": "2026-02-21T04:32:30Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44148/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44148",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44191/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44191",
+    "labels": [
+      "Audio"
+    ],
     "merged": false,
-    "number": 44148,
+    "number": 44191,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor Bros output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:05Z"
+    "title": "Fix #44155: [AudioFlamingo3] Batched inference produces incorrect result",
+    "updated_at": "2026-03-19T16:16:17Z"
   },
   {
-    "additions": 11,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CTRL mod\u2026",
+    "additions": 3,
+    "author": "excepshenal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Under fp16_full_eval or bf16_full_eval, still don't move model to device if using another dist train backend. This is causing bugs with FSDP2 + bf16_full_eval. The dist train backend would still be in charge of movi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44147",
-    "created_at": "2026-02-19T06:45:32Z",
-    "deletions": 47,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44189",
+    "created_at": "2026-02-21T00:06:16Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44147/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44147",
+    "files_url": "https://github.com/huggingface/transformers/pull/44189/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44189",
     "labels": [],
     "merged": false,
-    "number": 44147,
+    "number": 44189,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor CTRL output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:06Z"
+    "state": "open",
+    "title": "fix: don't move model to device under other dist train backends",
+    "updated_at": "2026-02-21T00:06:16Z"
   },
   {
-    "additions": 38,
-    "author": "khushali9",
+    "additions": 3,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? When using a step-based evaluation strategy (IntervalStrategy.STEPS), the trainer may skip evaluation at the final step if the last step does not align with eval_steps. This avoids missing the final evaluation while\u2026",
-    "changed_files": 4,
+    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 The NER/token classification issue and the downstream bug uncovered in the batched preprocessing use case with `LayoutLMv2Tokenizer`. \u2192 **Reasoning:** T\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 18,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44146",
-    "created_at": "2026-02-19T05:29:21Z",
-    "deletions": 11,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44187",
+    "created_at": "2026-02-20T20:02:04Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44146/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44146",
+    "files_url": "https://github.com/huggingface/transformers/pull/44187/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44187",
     "labels": [],
     "merged": true,
-    "number": 44146,
-    "review_comments_count": 14,
+    "number": 44187,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Ensure final evaluation runs with step-based evaluation strategy",
-    "updated_at": "2026-03-26T16:30:40Z"
+    "title": "fix(models): Fix LayoutLMv2 NER crash and broken batched truncation/padding",
+    "updated_at": "2026-02-23T10:30:51Z"
   },
   {
-    "additions": 400,
-    "author": "balvisio",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for sequence packing in the ESM2 model. Currently, the RotaryEmbedding class of the ESM2 model supports BSHD format. This PR makes the RotayEmbedding class aware of the`position_ids` and builds\u2026",
-    "changed_files": 8,
+    "additions": 361,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "part 1 of refactoring the `Trainer` docs - restructure the `toctree` a bit to accommodate new sections and docs - slim down `trainer.md` to be a clearer entry point (will expand the `## Next steps` section as we continue for better navigat\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 27,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44145",
-    "created_at": "2026-02-19T02:58:50Z",
-    "deletions": 216,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44185",
+    "created_at": "2026-02-20T19:25:07Z",
+    "deletions": 578,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44145/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44145",
+    "files_url": "https://github.com/huggingface/transformers/pull/44185/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44185",
     "labels": [],
     "merged": true,
-    "number": 44145,
-    "review_comments_count": 9,
+    "number": 44185,
+    "review_comments_count": 19,
     "state": "closed",
-    "title": "Add THD support in ESM",
-    "updated_at": "2026-04-09T14:40:26Z"
+    "title": "[docs] trainer part 1",
+    "updated_at": "2026-02-24T21:18:42Z"
   },
   {
-    "additions": 1481,
-    "author": "TinderZ",
+    "additions": 191,
+    "author": "mariam851",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds 5 Chinese translations for common NLP task tutorials that were missing from the `docs/source/zh/tasks/` directory. The following files are added: - `tasks/sequence_classification.md` - \u6587\u672c\u5206\u7c7b - `tasks/tok\u2026",
-    "changed_files": 6,
+    "body_excerpt": "This PR implements the initial architecture for CircuitGPT (based on OpenAI's research), as discussed in #44121. Key implementations: SparseLinear: Custom layer with Top-K weight sparsity logic. CircuitGpt Components: Attention, MLP, and C\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44144",
-    "created_at": "2026-02-19T02:35:08Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44184",
+    "created_at": "2026-02-20T16:58:27Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44144/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44144",
+    "files_url": "https://github.com/huggingface/transformers/pull/44184/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44184",
+    "labels": [],
+    "merged": false,
+    "number": 44184,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "feat: add OpenAI CircuitGPT core architecture and sparse linear layers",
+    "updated_at": "2026-02-20T17:18:44Z"
+  },
+  {
+    "additions": 1,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Our code has some references to the `grouped_entities` arg to the token classification pipeline, but this is no longer usable. This PR cleans them up entirely! Fixes #44016",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44182",
+    "created_at": "2026-02-20T15:28:26Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44182/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44182",
     "labels": [],
     "merged": true,
-    "number": 44144,
+    "number": 44182,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] Add Chinese translations for common NLP task tutorials",
-    "updated_at": "2026-02-20T16:50:29Z"
+    "title": "Remove refs to grouped_entities",
+    "updated_at": "2026-02-24T16:07:24Z"
   },
   {
-    "additions": 2,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes minor typos in the `GenerationConfig` class docstring: - \"overriden\" \u2192 \"overridden\" - \"field that are\" \u2192 \"fields that are\" - \"Arg:\" \u2192 \"Args:\" (consistent with the rest of the docstring) No code changes, docum\u2026",
-    "changed_files": 1,
+    "additions": 898,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title! Follow up of https://github.com/huggingface/transformers/pull/44130 and https://github.com/huggingface/transformers/pull/44226. Finally remove the `cache_position` everywhere (not ALL models, but a\u2026",
+    "changed_files": 169,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44143",
-    "created_at": "2026-02-18T23:07:23Z",
-    "deletions": 2,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44181",
+    "created_at": "2026-02-20T15:24:39Z",
+    "deletions": 2698,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44143/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44143",
+    "files_url": "https://github.com/huggingface/transformers/pull/44181/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44181",
     "labels": [],
     "merged": true,
-    "number": 44143,
-    "review_comments_count": 0,
+    "number": 44181,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "[docs] Fix typos in GenerationConfig docstring",
-    "updated_at": "2026-02-19T13:24:09Z"
+    "title": "[core] \ud83d\udea8 Completely remove cache positions",
+    "updated_at": "2026-03-04T18:08:42Z"
   },
   {
-    "additions": 72,
-    "author": "eustlb",
+    "additions": 28,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? so @Deep-unlearning noticed, benchmarking for the Open ASR leaderbaord, that the current implem is particularly slow. That would make sense since we go through every layer of the encoder forward, and that the stream\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Fixes a flaky test in IdeficsForVisionText2TextTest::test_generate_continue_from_inputs_embeds. The flakiness can be reproduced with: ``` pytest -q -p no:rerunfailures --flake-finder --flake-runs=20 \\ tests/models/i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44142",
-    "created_at": "2026-02-18T21:44:11Z",
-    "deletions": 12,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44180",
+    "created_at": "2026-02-20T14:30:46Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44142/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44142",
+    "files_url": "https://github.com/huggingface/transformers/pull/44180/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44180",
     "labels": [],
-    "merged": false,
-    "number": 44142,
+    "merged": true,
+    "number": 44180,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[voxtral-realtime] get more perfs!",
-    "updated_at": "2026-02-23T17:25:45Z"
+    "state": "closed",
+    "title": "fix(flaky): idefics generate cache flake",
+    "updated_at": "2026-02-26T16:18:18Z"
   },
   {
-    "additions": 42,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44102 (original account: @fumadari). ## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of\u2026",
+    "additions": 27,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44141",
-    "created_at": "2026-02-18T21:14:53Z",
-    "deletions": 154,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44179",
+    "created_at": "2026-02-20T13:51:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44141/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44141",
+    "files_url": "https://github.com/huggingface/transformers/pull/44179/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44179",
     "labels": [],
-    "merged": false,
-    "number": 44141,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44179,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Refactor ibert output tracing with capture_outputs",
-    "updated_at": "2026-02-22T02:28:47Z"
+    "title": "Models with incorrect tokenizer_class in tokenization_config.json tha\u2026",
+    "updated_at": "2026-02-23T08:33:13Z"
   },
   {
-    "additions": 66,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44104 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs`\u2026",
-    "changed_files": 1,
+    "additions": 2993,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Re-opening https://github.com/huggingface/transformers/pull/37868 TODO - [x] recompute expected outputs - [x] passthrough code given new conventions - [x] check for unused code paths / configuration parameters Origi\u2026",
+    "changed_files": 27,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44140",
-    "created_at": "2026-02-18T21:14:50Z",
-    "deletions": 207,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44178",
+    "created_at": "2026-02-20T12:36:21Z",
+    "deletions": 50,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44140/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44140",
+    "files_url": "https://github.com/huggingface/transformers/pull/44178/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44178",
     "labels": [],
     "merged": false,
-    "number": 44140,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor megatron_bert to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:48Z"
+    "number": 44178,
+    "review_comments_count": 9,
+    "state": "open",
+    "title": "Add xcodec2 model",
+    "updated_at": "2026-04-13T14:26:21Z"
   },
   {
-    "additions": 39,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44105 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
-    "changed_files": 1,
+    "additions": 41,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, spiritual successor to #44081 Why? Because as is - Only defaults for fa2/fa3, not on other requested kernels - Limits implementations to one kernel/implementation while I suspect that there will be multiple viable versions (i\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44139",
-    "created_at": "2026-02-18T21:14:46Z",
-    "deletions": 127,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44177",
+    "created_at": "2026-02-20T12:13:30Z",
+    "deletions": 71,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44139/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44139",
+    "files_url": "https://github.com/huggingface/transformers/pull/44177/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44177",
     "labels": [],
-    "merged": false,
-    "number": 44139,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44177,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Refactor lilt to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:48Z"
+    "title": "[`Flash Attn`] Enable compatible implementations",
+    "updated_at": "2026-02-20T12:43:35Z"
   },
   {
-    "additions": 51,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44106 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
-    "changed_files": 2,
+    "additions": 271,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Our kernel loading is incompatible with the original packages as they do not expose the same import structure: - Kernels seem to expose things in the init (and not in the original path) - Original packages seem to expose only within their\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44138",
-    "created_at": "2026-02-18T21:14:42Z",
-    "deletions": 132,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44176",
+    "created_at": "2026-02-20T11:36:01Z",
+    "deletions": 124,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44138/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44138",
+    "files_url": "https://github.com/huggingface/transformers/pull/44176/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44176",
     "labels": [],
-    "merged": false,
-    "number": 44138,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44176,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Refactor yoso to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:49Z"
+    "title": "[`Mamba`] Fix kernel loading",
+    "updated_at": "2026-02-20T16:19:06Z"
   },
   {
-    "additions": 43,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44107 (original account: @fumadari). ## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture\u2026",
+    "additions": 1,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC) for vllm: https://buildkite.com/vllm/ci/builds/52260/steps/canvas?sid=019c76ad-c8f2-4e59-a2f4-5f3b5bbc204c&tab=output",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44137",
-    "created_at": "2026-02-18T21:14:39Z",
-    "deletions": 113,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44175",
+    "created_at": "2026-02-20T11:00:18Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44137/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44137",
+    "files_url": "https://github.com/huggingface/transformers/pull/44175/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44175",
     "labels": [],
     "merged": false,
-    "number": 44137,
+    "number": 44175,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(mra): use output tracing decorators",
-    "updated_at": "2026-02-22T02:28:50Z"
+    "title": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC)",
+    "updated_at": "2026-02-20T16:19:31Z"
   },
   {
-    "additions": 37,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44108 (original account: @fumadari). ## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_\u2026",
-    "changed_files": 2,
+    "additions": 1367,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This draft expands `utils/check_modeling_structure.py` into a rule-driven linter for model code, with new checks and tests, while keeping runtime very low. Key features: - The checker is intentionally AST-only (no heavy imports/execution),\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44136",
-    "created_at": "2026-02-18T21:14:35Z",
-    "deletions": 86,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44174",
+    "created_at": "2026-02-20T10:38:11Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44136/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44136",
+    "files_url": "https://github.com/huggingface/transformers/pull/44174/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44174",
     "labels": [],
-    "merged": false,
-    "number": 44136,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44174,
+    "review_comments_count": 38,
     "state": "closed",
-    "title": "refactor(vitdet): use output tracing decorators",
-    "updated_at": "2026-02-22T02:28:50Z"
+    "title": "Expand model-structure lint rules with a fast AST-based, ruff-like framework",
+    "updated_at": "2026-03-12T06:42:21Z"
   },
   {
-    "additions": 48,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44109 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `H\u2026",
-    "changed_files": 2,
+    "additions": 20,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes flaky GLM OCR generation behavior when 2D `position_ids` are passed explicitly. Reproducible locally with: ``` pytest tests/models/glm_ocr/test_modeling_glm_ocr.py::GlmOcrModelTest::test_generate_with_and_without_position_ids --flake\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44135",
-    "created_at": "2026-02-18T21:14:31Z",
-    "deletions": 87,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44173",
+    "created_at": "2026-02-20T09:28:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44135/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44135",
+    "files_url": "https://github.com/huggingface/transformers/pull/44173/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44173",
     "labels": [],
-    "merged": false,
-    "number": 44135,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44173,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:51Z"
+    "title": "fix(flaky): `test_generate_with_and_without_position_ids` in GLM ORC",
+    "updated_at": "2026-02-20T19:06:19Z"
   },
   {
-    "additions": 28,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44110 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators\u2026",
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary I've noticed `test_synthidtext_watermark_processor_distributional_convergence_*` was our slowest tests in CircleCI This PR speeds up the slowest SynthID distributional convergence tests by optimizing a hot path in SynthIDTextWat\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44134",
-    "created_at": "2026-02-18T21:14:27Z",
-    "deletions": 101,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44172",
+    "created_at": "2026-02-20T08:59:30Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44134/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44134",
+    "files_url": "https://github.com/huggingface/transformers/pull/44172/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44172",
     "labels": [],
-    "merged": false,
-    "number": 44134,
+    "merged": true,
+    "number": 44172,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(tvp): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:51Z"
+    "title": "perf: Optimize SynthID logits processor batch index construction",
+    "updated_at": "2026-02-27T09:32:43Z"
   },
   {
-    "additions": 30,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44111 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` dec\u2026",
-    "changed_files": 1,
+    "additions": 2449,
+    "author": "lmaksym",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds TDT decoder support for Parakeet ASR models, extending the existing CTC-only implementation. It incorporates the initial TDT integration work from [#41545](https://github.com/huggingface/transformers/pu\u2026",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44133",
-    "created_at": "2026-02-18T21:12:22Z",
-    "deletions": 59,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44171",
+    "created_at": "2026-02-20T08:44:46Z",
+    "deletions": 294,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44133/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44133",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44171/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44171",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
     "merged": false,
-    "number": 44133,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(poolformer): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:52Z"
+    "number": 44171,
+    "review_comments_count": 70,
+    "state": "open",
+    "title": "Parakeet tdt",
+    "updated_at": "2026-04-13T13:42:38Z"
   },
   {
-    "additions": 13,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #43674 broke voxtral processor",
-    "changed_files": 1,
+    "additions": 74,
+    "author": "veeceey",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Adds `GitForCausalLM` to `MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES` so GIT can be used with the `visual-question-answering` pipeline - Filters tokenizer outputs in VQA pipeline `preprocess` to only pass keys accepted\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44132",
-    "created_at": "2026-02-18T20:13:15Z",
-    "deletions": 34,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44170",
+    "created_at": "2026-02-20T08:28:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44132/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44132",
+    "files_url": "https://github.com/huggingface/transformers/pull/44170/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44170",
     "labels": [],
-    "merged": true,
-    "number": 44132,
-    "review_comments_count": 6,
+    "merged": false,
+    "number": 44170,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[voxtral] fix voxtral proc",
-    "updated_at": "2026-02-19T16:41:53Z"
+    "title": "Add GIT model support in VQA pipeline",
+    "updated_at": "2026-02-20T09:34:31Z"
   },
   {
-    "additions": 2,
-    "author": "cluster2600",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What Two small corrections in `docs/source/en/quantization/overview.md`: 1. **Typo fix**: `AuoQuant Notebook` \u2192 `AutoQuant Notebook` in the *User-Friendly Quantization Tools* section. The letter `t` was missing from the link text. 2. **\u2026",
-    "changed_files": 1,
+    "additions": 415,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Initial ty integration. To avoid a gigantic, risky patch, let's start with a baby step where we add the tooling to make repo-check and activate it on a subset of the repo. That gives us a human-readable patch, and allows us to get conforta\u2026",
+    "changed_files": 25,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44131",
-    "created_at": "2026-02-18T19:25:52Z",
-    "deletions": 2,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44167",
+    "created_at": "2026-02-20T07:39:44Z",
+    "deletions": 210,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44131/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44131",
+    "files_url": "https://github.com/huggingface/transformers/pull/44167/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44167",
     "labels": [],
     "merged": true,
-    "number": 44131,
-    "review_comments_count": 0,
+    "number": 44167,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "docs: fix typo 'AuoQuant' \u2192 'AutoQuant' and clarify FINEGRAINED_FP8 library column",
-    "updated_at": "2026-02-18T20:49:47Z"
+    "title": "chore(typing): initial ty integration",
+    "updated_at": "2026-02-20T19:08:09Z"
   },
   {
-    "additions": 302,
-    "author": "Cyrilvallez",
+    "additions": 73,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This PR is the first big step towards removing the `cache_position` everywhere, as they are not needed in general and everything can be inferred from the cache itself. The major changes are the fol\u2026",
-    "changed_files": 23,
+    "body_excerpt": "The CI does not output useful info on this flaky test - `tests.models.olmo.test_modeling_olmo.OlmoModelTest testMethod=test_generate_with_static_cache` and makes it harder to determine the root problem when not reproducible locally. This p\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44130",
-    "created_at": "2026-02-18T11:58:54Z",
-    "deletions": 886,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44166",
+    "created_at": "2026-02-20T07:20:15Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44130/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44130",
+    "files_url": "https://github.com/huggingface/transformers/pull/44166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44166",
     "labels": [],
     "merged": true,
-    "number": 44130,
-    "review_comments_count": 16,
+    "number": 44166,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "[generate] Completely stop relying on `cache_position` to prepare inputs",
-    "updated_at": "2026-02-20T18:46:19Z"
+    "title": "Improve `has_similar_generate_outputs` assertions",
+    "updated_at": "2026-02-27T08:26:13Z"
   },
   {
-    "additions": 76,
-    "author": "preetam1407",
+    "additions": 29,
+    "author": "alexmalyshev",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors SpeechT5 to the standardized output tracing interface. - Adds `@capture_outputs` to base encoder/decoder forwards. - Adds `_can_record_outputs` mappings for hidden states and attentions. - Adds `@can_retur\u2026",
+    "body_excerpt": "With Python 3.14 and PEP 649, you can no longer expect `cls.__dict__[\"__annotations__\"]` to contain annotations for the exact class, it will be loaded lazily and can make it seem like the class doesn't have any annotations. The recommended\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44129",
-    "created_at": "2026-02-18T11:24:13Z",
-    "deletions": 222,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44165",
+    "created_at": "2026-02-20T04:47:07Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44129/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44129",
+    "files_url": "https://github.com/huggingface/transformers/pull/44165/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44165",
     "labels": [],
     "merged": false,
-    "number": 44129,
+    "number": 44165,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor SpeechT5 output tracing to standardized output capture",
-    "updated_at": "2026-02-18T11:25:19Z"
+    "state": "closed",
+    "title": "Fix how PreTrainedModel checks annotations on Python 3.14+",
+    "updated_at": "2026-03-20T17:31:11Z"
   },
   {
-    "additions": 59,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Just makes sure we trigger dev version update",
+    "additions": 7,
+    "author": "lhallee",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes: https://github.com/huggingface/transformers/issues/44162 @ArthurZucker @Cyrilvallez",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44128",
-    "created_at": "2026-02-18T10:42:21Z",
-    "deletions": 5,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44163",
+    "created_at": "2026-02-19T21:44:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44128/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44128",
+    "files_url": "https://github.com/huggingface/transformers/pull/44163/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44163",
     "labels": [],
-    "merged": false,
-    "number": 44128,
-    "review_comments_count": 4,
+    "merged": true,
+    "number": 44163,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "update release workflow",
-    "updated_at": "2026-03-30T13:40:19Z"
+    "title": "ESM2 attention_mask and token_dropout fix",
+    "updated_at": "2026-02-20T15:17:31Z"
   },
   {
-    "additions": 3,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "when the model_type isn't in `TOKENIZER_MAPPING_NAMES` (ex. \"llama\"), `TOKENIZER_MAPPING_NAMES.get(\"llama\", \"\")` --> \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026",
-    "changed_files": 3,
+    "additions": 379,
+    "author": "cogniera",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "What does this PR do? This PR refactors the LongT5 model to use the @capture_outputs and @can_return_tuple decorators for standardized output handling across the model stack. The refactor removes manual handling of: output_attentions outpu\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44127",
-    "created_at": "2026-02-18T10:41:48Z",
-    "deletions": 8,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44161",
+    "created_at": "2026-02-19T20:46:49Z",
+    "deletions": 170,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44127/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44127",
+    "files_url": "https://github.com/huggingface/transformers/pull/44161/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44161",
     "labels": [],
-    "merged": true,
-    "number": 44127,
+    "merged": false,
+    "number": 44161,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "AutoTokenizer ignores config when model_type is None",
-    "updated_at": "2026-02-18T14:47:52Z"
+    "state": "open",
+    "title": "Refactor LongT5 to use @capture_outputs and @can_return_tuple decorators for unified output handling (Fixes #43979)",
+    "updated_at": "2026-02-20T17:28:12Z"
   },
   {
-    "additions": 17,
-    "author": "Cyrilvallez",
+    "additions": 2104,
+    "author": "molbap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Hey yall, I started porting the pi0 model so Transformers can be a backend for VLAs directly. I checked it against LeRobot on fix/lerobot_openpi: outputs seem to match and for sure lerobot/pi0_base loads cleanly (no\u2026",
+    "changed_files": 22,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44126",
-    "created_at": "2026-02-18T09:58:49Z",
-    "deletions": 40,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44160",
+    "created_at": "2026-02-19T17:16:29Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44126/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44126",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44160/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44160",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44126,
-    "review_comments_count": 0,
+    "number": 44160,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "Simplify input preparation in generate",
-    "updated_at": "2026-02-18T10:30:48Z"
+    "title": "Add model lerobot PI0 to transformers",
+    "updated_at": "2026-03-16T10:23:14Z"
   },
   {
-    "additions": 8,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986",
-    "changed_files": 1,
+    "additions": 67,
+    "author": "samuelleecong",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Closes #28103 - Refactor `OwlViTAttention` to use `ALL_ATTENTION_FUNCTIONS` for dynamic attention backend dispatch (same pattern as CLIP) - Add `eager_attention_forward` standalone function with the standardized interface - Resh\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44125",
-    "created_at": "2026-02-18T09:34:54Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44159",
+    "created_at": "2026-02-19T16:31:44Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44125/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44125",
+    "files_url": "https://github.com/huggingface/transformers/pull/44159/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44159",
     "labels": [],
-    "merged": true,
-    "number": 44125,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Raise informative error when loading video processors",
-    "updated_at": "2026-02-20T08:23:35Z"
+    "merged": false,
+    "number": 44159,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add SDPA and Flash Attention support for OWL-ViT",
+    "updated_at": "2026-02-24T12:53:10Z"
   },
   {
-    "additions": 10,
-    "author": "mariam851",
+    "additions": 141,
+    "author": "leopold-tzafon",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# Fix issue where `use_cache=False`, corrupts model Qwen3vl output. Tested with: ``` import torch from transformers import Qwen3VLForConditionalGeneration, AutoProcessor MODEL_NAME = \"Qwen/Qwen3-VL-4B-Instruct\" DEVICE = \"cuda\" DTYPE = torc\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44124",
-    "created_at": "2026-02-18T08:52:23Z",
-    "deletions": 0,
+    "comments_count": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44158",
+    "created_at": "2026-02-19T15:45:13Z",
+    "deletions": 36,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44124/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44124",
+    "files_url": "https://github.com/huggingface/transformers/pull/44158/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44158",
     "labels": [],
-    "merged": false,
-    "number": 44124,
+    "merged": true,
+    "number": 44158,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: add eval_on_end to Trainer for final evaluation",
-    "updated_at": "2026-02-18T14:14:16Z"
+    "title": "fix bug with position_ids on qwen3-vl models, such that position_ids include text position",
+    "updated_at": "2026-02-23T14:53:33Z"
   },
   {
-    "additions": 33,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.",
-    "changed_files": 1,
+    "additions": 689,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, gets rid of `if/else` per attn implementation",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44123",
-    "created_at": "2026-02-18T08:22:57Z",
-    "deletions": 22,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44157",
+    "created_at": "2026-02-19T14:49:49Z",
+    "deletions": 834,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44123/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44123",
+    "files_url": "https://github.com/huggingface/transformers/pull/44157/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44157",
     "labels": [],
     "merged": false,
-    "number": 44123,
-    "review_comments_count": 0,
+    "number": 44157,
+    "review_comments_count": 10,
     "state": "open",
-    "title": "Avoid device sync in training loss accumulation",
-    "updated_at": "2026-03-30T07:57:16Z"
+    "title": "Use correct mask for packed inputs in Qwen-VL ",
+    "updated_at": "2026-02-24T13:13:43Z"
   },
   {
-    "additions": 158,
-    "author": "adityuhkapoor",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026",
-    "changed_files": 4,
+    "additions": 2,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes a minor error when using aqml quantization. We specified the wrong argument.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44122",
-    "created_at": "2026-02-18T06:35:09Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44156",
+    "created_at": "2026-02-19T14:35:38Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44122/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44122",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44156/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44156",
+    "labels": [],
     "merged": false,
-    "number": 44122,
+    "number": 44156,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add BnB 4-bit embedding quantization support",
-    "updated_at": "2026-02-18T14:27:25Z"
+    "title": "Fix aqml `modules_to_not_convert`",
+    "updated_at": "2026-03-27T16:50:02Z"
   },
   {
-    "additions": 14,
-    "author": "tirth8205",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026",
+    "additions": 44,
+    "author": "Aatman09",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44120",
-    "created_at": "2026-02-17T23:56:48Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44120/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44120",
-    "labels": [
-      "Code agent slop"
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
     ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44154",
+    "created_at": "2026-02-19T12:17:56Z",
+    "deletions": 52,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44154/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44154",
+    "labels": [],
     "merged": false,
-    "number": 44120,
+    "number": 44154,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: allow image_transforms.resize to handle negative values after normalization",
-    "updated_at": "2026-02-18T14:08:54Z"
+    "state": "open",
+    "title": "Refactored vits to match standardized output collection interface",
+    "updated_at": "2026-02-19T12:18:56Z"
   },
   {
-    "additions": 1,
-    "author": "tirth8205",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026",
-    "changed_files": 1,
+    "additions": 79,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44119",
-    "created_at": "2026-02-17T23:53:20Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44152",
+    "created_at": "2026-02-19T09:37:51Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44119/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44119",
+    "files_url": "https://github.com/huggingface/transformers/pull/44152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44152",
     "labels": [],
-    "merged": false,
-    "number": 44119,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44152,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer",
-    "updated_at": "2026-02-18T14:04:47Z"
+    "title": "AutoGrad support for grouped_mm fallback",
+    "updated_at": "2026-02-20T11:15:23Z"
   },
   {
-    "additions": 32,
-    "author": "tirth8205",
+    "additions": 58,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the BioGPT m\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44118",
-    "created_at": "2026-02-17T23:31:31Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44151",
+    "created_at": "2026-02-19T06:55:43Z",
+    "deletions": 134,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44118/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44118",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44151/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44151",
+    "labels": [],
     "merged": false,
-    "number": 44118,
+    "number": 44151,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields",
-    "updated_at": "2026-02-18T14:18:12Z"
+    "title": "Refactor BioGPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:01Z"
   },
   {
-    "additions": 27,
-    "author": "dtiourine",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026",
+    "additions": 22,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the MPT mode\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44116",
-    "created_at": "2026-02-17T21:52:13Z",
-    "deletions": 102,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44150",
+    "created_at": "2026-02-19T06:54:09Z",
+    "deletions": 73,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44116/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44116",
+    "files_url": "https://github.com/huggingface/transformers/pull/44150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44150",
     "labels": [],
     "merged": false,
-    "number": 44116,
+    "number": 44150,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface",
-    "updated_at": "2026-02-17T21:53:23Z"
+    "state": "closed",
+    "title": "Refactor MPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:02Z"
   },
   {
-    "additions": 2,
-    "author": "Deep-unlearning",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026",
-    "changed_files": 2,
+    "additions": 85,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CPMAnt m\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44115",
-    "created_at": "2026-02-17T21:32:55Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44149",
+    "created_at": "2026-02-19T06:51:06Z",
+    "deletions": 201,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44115/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44115",
+    "files_url": "https://github.com/huggingface/transformers/pull/44149/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44149",
     "labels": [],
-    "merged": true,
-    "number": 44115,
+    "merged": false,
+    "number": 44149,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] fix broken chat_templating links in tasks docs",
-    "updated_at": "2026-02-23T16:27:57Z"
+    "title": "Refactor CPMAnt output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:03Z"
   },
   {
-    "additions": 716,
-    "author": "23atharvaS",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026",
-    "changed_files": 19,
+    "additions": 33,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the Bros mod\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44114",
-    "created_at": "2026-02-17T21:17:35Z",
-    "deletions": 1237,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44148",
+    "created_at": "2026-02-19T06:46:24Z",
+    "deletions": 124,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44114/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44114",
+    "files_url": "https://github.com/huggingface/transformers/pull/44148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44148",
     "labels": [],
     "merged": false,
-    "number": 44114,
+    "number": 44148,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators",
-    "updated_at": "2026-02-18T20:34:53Z"
+    "state": "closed",
+    "title": "Refactor Bros output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:05Z"
   },
   {
-    "additions": 5,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026",
+    "additions": 11,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CTRL mod\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44113",
-    "created_at": "2026-02-17T20:01:32Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44147",
+    "created_at": "2026-02-19T06:45:32Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44113/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44113",
+    "files_url": "https://github.com/huggingface/transformers/pull/44147/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44147",
     "labels": [],
-    "merged": true,
-    "number": 44113,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44147,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Update stale device override test in GraniteSpeech",
-    "updated_at": "2026-02-19T11:24:29Z"
+    "title": "Refactor CTRL output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:06Z"
   },
   {
-    "additions": 30,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026",
-    "changed_files": 1,
+    "additions": 38,
+    "author": "khushali9",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When using a step-based evaluation strategy (IntervalStrategy.STEPS), the trainer may skip evaluation at the final step if the last step does not align with eval_steps. This avoids missing the final evaluation while\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44111",
-    "created_at": "2026-02-17T19:38:02Z",
-    "deletions": 59,
+    "comments_count": 18,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44146",
+    "created_at": "2026-02-19T05:29:21Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44111/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44111",
+    "files_url": "https://github.com/huggingface/transformers/pull/44146/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44146",
     "labels": [],
-    "merged": false,
-    "number": 44111,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44146,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "refactor(poolformer): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:22Z"
+    "title": "Ensure final evaluation runs with step-based evaluation strategy",
+    "updated_at": "2026-03-26T16:30:40Z"
   },
   {
-    "additions": 28,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026",
-    "changed_files": 1,
+    "additions": 400,
+    "author": "balvisio",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for sequence packing in the ESM2 model. Currently, the RotaryEmbedding class of the ESM2 model supports BSHD format. This PR makes the RotayEmbedding class aware of the`position_ids` and builds\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44110",
-    "created_at": "2026-02-17T19:32:55Z",
-    "deletions": 101,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44145",
+    "created_at": "2026-02-19T02:58:50Z",
+    "deletions": 216,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44110/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44110",
+    "files_url": "https://github.com/huggingface/transformers/pull/44145/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44145",
     "labels": [],
-    "merged": false,
-    "number": 44110,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44145,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "refactor(tvp): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:24Z"
+    "title": "Add THD support in ESM",
+    "updated_at": "2026-04-09T14:40:26Z"
   },
   {
-    "additions": 48,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026",
-    "changed_files": 2,
+    "additions": 1481,
+    "author": "TinderZ",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds 5 Chinese translations for common NLP task tutorials that were missing from the `docs/source/zh/tasks/` directory. The following files are added: - `tasks/sequence_classification.md` - \u6587\u672c\u5206\u7c7b - `tasks/tok\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44109",
-    "created_at": "2026-02-17T19:23:03Z",
-    "deletions": 87,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44144",
+    "created_at": "2026-02-19T02:35:08Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44109/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44109",
+    "files_url": "https://github.com/huggingface/transformers/pull/44144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44144",
     "labels": [],
-    "merged": false,
-    "number": 44109,
+    "merged": true,
+    "number": 44144,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:25Z"
+    "title": "[docs] Add Chinese translations for common NLP task tutorials",
+    "updated_at": "2026-02-20T16:50:29Z"
   },
   {
-    "additions": 33,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026",
+    "additions": 2,
+    "author": "nightcityblade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes minor typos in the `GenerationConfig` class docstring: - \"overriden\" \u2192 \"overridden\" - \"field that are\" \u2192 \"fields that are\" - \"Arg:\" \u2192 \"Args:\" (consistent with the rest of the docstring) No code changes, docum\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44108",
-    "created_at": "2026-02-17T19:15:00Z",
-    "deletions": 82,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44143",
+    "created_at": "2026-02-18T23:07:23Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44108/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44108",
+    "files_url": "https://github.com/huggingface/transformers/pull/44143/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44143",
     "labels": [],
-    "merged": false,
-    "number": 44108,
+    "merged": true,
+    "number": 44143,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(vitdet): use output tracing decorators",
-    "updated_at": "2026-02-18T21:19:27Z"
+    "title": "[docs] Fix typos in GenerationConfig docstring",
+    "updated_at": "2026-02-19T13:24:09Z"
   },
   {
-    "additions": 40,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026",
-    "changed_files": 1,
+    "additions": 72,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? so @Deep-unlearning noticed, benchmarking for the Open ASR leaderbaord, that the current implem is particularly slow. That would make sense since we go through every layer of the encoder forward, and that the stream\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44107",
-    "created_at": "2026-02-17T19:04:42Z",
-    "deletions": 112,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44142",
+    "created_at": "2026-02-18T21:44:11Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44107/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44107",
+    "files_url": "https://github.com/huggingface/transformers/pull/44142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44142",
     "labels": [],
     "merged": false,
-    "number": 44107,
+    "number": 44142,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(mra): use output tracing decorators",
-    "updated_at": "2026-02-18T21:19:29Z"
+    "state": "open",
+    "title": "[voxtral-realtime] get more perfs!",
+    "updated_at": "2026-02-23T17:25:45Z"
   },
   {
-    "additions": 47,
-    "author": "fumadari",
+    "additions": 42,
+    "author": "dario-fumarola",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44102 (original account: @fumadari). ## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44106",
-    "created_at": "2026-02-17T18:59:25Z",
-    "deletions": 132,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44141",
+    "created_at": "2026-02-18T21:14:53Z",
+    "deletions": 154,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44106/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44106",
+    "files_url": "https://github.com/huggingface/transformers/pull/44141/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44141",
     "labels": [],
     "merged": false,
-    "number": 44106,
+    "number": 44141,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor yoso to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:30Z"
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-22T02:28:47Z"
   },
   {
-    "additions": 39,
-    "author": "fumadari",
+    "additions": 66,
+    "author": "dario-fumarola",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44104 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44105",
-    "created_at": "2026-02-17T18:54:40Z",
-    "deletions": 127,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44140",
+    "created_at": "2026-02-18T21:14:50Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44105/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44105",
+    "files_url": "https://github.com/huggingface/transformers/pull/44140/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44140",
     "labels": [],
     "merged": false,
-    "number": 44105,
+    "number": 44140,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor lilt to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:32Z"
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
   },
   {
-    "additions": 66,
-    "author": "fumadari",
+    "additions": 39,
+    "author": "dario-fumarola",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44105 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44104",
-    "created_at": "2026-02-17T18:43:44Z",
-    "deletions": 207,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44139",
+    "created_at": "2026-02-18T21:14:46Z",
+    "deletions": 127,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44104/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44104",
+    "files_url": "https://github.com/huggingface/transformers/pull/44139/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44139",
     "labels": [],
     "merged": false,
-    "number": 44104,
+    "number": 44139,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor megatron_bert to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:34Z"
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
   },
   {
-    "additions": 53,
-    "author": "engmohamedsalah",
+    "additions": 51,
+    "author": "dario-fumarola",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44106 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44103",
-    "created_at": "2026-02-17T18:04:48Z",
-    "deletions": 76,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44138",
+    "created_at": "2026-02-18T21:14:42Z",
+    "deletions": 132,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44103/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44103",
+    "files_url": "https://github.com/huggingface/transformers/pull/44138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44138",
     "labels": [],
     "merged": false,
-    "number": 44103,
+    "number": 44138,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix glm_moe_dsa",
-    "updated_at": "2026-02-18T19:38:11Z"
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:49Z"
   },
   {
-    "additions": 42,
-    "author": "fumadari",
+    "additions": 43,
+    "author": "dario-fumarola",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44107 (original account: @fumadari). ## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44102",
-    "created_at": "2026-02-17T17:21:32Z",
-    "deletions": 154,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44137",
+    "created_at": "2026-02-18T21:14:39Z",
+    "deletions": 113,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44102/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44102",
+    "files_url": "https://github.com/huggingface/transformers/pull/44137/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44137",
     "labels": [],
     "merged": false,
-    "number": 44102,
+    "number": 44137,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ibert output tracing with capture_outputs",
-    "updated_at": "2026-02-18T21:19:35Z"
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
   },
   {
-    "additions": 210,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026",
+    "additions": 37,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44108 (original account: @fumadari). ## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44101",
-    "created_at": "2026-02-17T17:15:06Z",
-    "deletions": 194,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44136",
+    "created_at": "2026-02-18T21:14:35Z",
+    "deletions": 86,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44101/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44101",
+    "files_url": "https://github.com/huggingface/transformers/pull/44136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44136",
     "labels": [],
     "merged": false,
-    "number": 44101,
+    "number": 44136,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture",
-    "updated_at": "2026-02-19T08:08:33Z"
+    "state": "closed",
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
   },
   {
-    "additions": 3,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026",
-    "changed_files": 1,
+    "additions": 48,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44109 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `H\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44100",
-    "created_at": "2026-02-17T17:10:36Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44135",
+    "created_at": "2026-02-18T21:14:31Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44100/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44100",
+    "files_url": "https://github.com/huggingface/transformers/pull/44135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44135",
     "labels": [],
-    "merged": true,
-    "number": 44100,
+    "merged": false,
+    "number": 44135,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps",
-    "updated_at": "2026-02-20T09:57:51Z"
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
   },
   {
-    "additions": 2,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 28,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44110 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44099",
-    "created_at": "2026-02-17T16:45:35Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44134",
+    "created_at": "2026-02-18T21:14:27Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44099/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44099",
+    "files_url": "https://github.com/huggingface/transformers/pull/44134/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44134",
     "labels": [],
-    "merged": true,
-    "number": 44099,
+    "merged": false,
+    "number": 44134,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Bump dev version",
-    "updated_at": "2026-02-18T10:03:54Z"
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
   },
   {
-    "additions": 125,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR refactors ViLT's output handling to align with the standardized patterns used across the codebase. Key changes: - Removes manual `hidden_states`/`attentions` propagation and passes `output_attentions`, `out\u2026",
-    "changed_files": 2,
+    "additions": 30,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44111 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` dec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44098",
-    "created_at": "2026-02-17T16:32:34Z",
-    "deletions": 138,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44133",
+    "created_at": "2026-02-18T21:12:22Z",
+    "deletions": 59,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44098/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44098",
+    "files_url": "https://github.com/huggingface/transformers/pull/44133/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44133",
     "labels": [],
     "merged": false,
-    "number": 44098,
+    "number": 44133,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[ViLT] Refactor output handling to align with standardized patterns",
-    "updated_at": "2026-02-17T16:37:46Z"
+    "state": "closed",
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:52Z"
   },
   {
-    "additions": 12,
-    "author": "Rocketknight1",
+    "additions": 13,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "The `test_keep_in_fp32_modules` issues in #44052 are because the test assumes a model has **either** `_keep_in_fp32_modules` or `_keep_in_fp32_modules_strict` **but not both.** The only model that uses both is `glm_moe_dsa`, so this is the\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? #43674 broke voxtral processor",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44097",
-    "created_at": "2026-02-17T15:43:55Z",
-    "deletions": 42,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44132",
+    "created_at": "2026-02-18T20:13:15Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44097/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44097",
+    "files_url": "https://github.com/huggingface/transformers/pull/44132/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44132",
     "labels": [],
     "merged": true,
-    "number": 44097,
-    "review_comments_count": 2,
+    "number": 44132,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Merge test_keep_in_fp32_modules and test_keep_in_fp32_modules_strict",
-    "updated_at": "2026-02-17T16:23:33Z"
+    "title": "[voxtral] fix voxtral proc",
+    "updated_at": "2026-02-19T16:41:53Z"
   },
   {
-    "additions": 3,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Tests were written (and pass) on DGX A100, here are the values for our runners.",
+    "additions": 2,
+    "author": "cluster2600",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What Two small corrections in `docs/source/en/quantization/overview.md`: 1. **Typo fix**: `AuoQuant Notebook` \u2192 `AutoQuant Notebook` in the *User-Friendly Quantization Tools* section. The letter `t` was missing from the link text. 2. **\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44096",
-    "created_at": "2026-02-17T15:14:26Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44131",
+    "created_at": "2026-02-18T19:25:52Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44096/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44096",
+    "files_url": "https://github.com/huggingface/transformers/pull/44131/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44131",
     "labels": [],
     "merged": true,
-    "number": 44096,
+    "number": 44131,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[voxtral-realtime] update runner expected values ",
-    "updated_at": "2026-02-17T15:23:19Z"
+    "title": "docs: fix typo 'AuoQuant' \u2192 'AutoQuant' and clarify FINEGRAINED_FP8 library column",
+    "updated_at": "2026-02-18T20:49:47Z"
   },
   {
-    "additions": 43,
+    "additions": 302,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. The check that was added in https://github.com/huggingface/transformers/pull/43768 is wrong, as a missing weight would NOT be reinitialized in some cases! As for the pointers check, it is actually\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44095",
-    "created_at": "2026-02-17T14:33:22Z",
-    "deletions": 47,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44095/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44095",
-    "labels": [],
-    "merged": true,
-    "number": 44095,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix loading logic issue",
-    "updated_at": "2026-04-03T04:52:35Z"
-  },
-  {
-    "additions": 24,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. This PR is the first big step towards removing the `cache_position` everywhere, as they are not needed in general and everything can be inferred from the cache itself. The major changes are the fol\u2026",
+    "changed_files": 23,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44094",
-    "created_at": "2026-02-17T14:15:10Z",
-    "deletions": 70,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44130",
+    "created_at": "2026-02-18T11:58:54Z",
+    "deletions": 886,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44094/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44094",
+    "files_url": "https://github.com/huggingface/transformers/pull/44130/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44130",
     "labels": [],
-    "merged": false,
-    "number": 44094,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44130,
+    "review_comments_count": 16,
     "state": "closed",
-    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:07Z"
+    "title": "[generate] Completely stop relying on `cache_position` to prepare inputs",
+    "updated_at": "2026-02-20T18:46:19Z"
   },
   {
-    "additions": 28,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "additions": 76,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors SpeechT5 to the standardized output tracing interface. - Adds `@capture_outputs` to base encoder/decoder forwards. - Adds `_can_record_outputs` mappings for hidden states and attentions. - Adds `@can_retur\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
+    "cluster_id": "cluster-43979-24",
     "cluster_ids": [
-      "cluster-43979-21"
+      "cluster-43979-24"
     ],
     "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44093",
-    "created_at": "2026-02-17T14:15:07Z",
-    "deletions": 129,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44129",
+    "created_at": "2026-02-18T11:24:13Z",
+    "deletions": 222,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44093/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44093",
+    "files_url": "https://github.com/huggingface/transformers/pull/44129/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44129",
     "labels": [],
     "merged": false,
-    "number": 44093,
+    "number": 44129,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:09Z"
+    "state": "open",
+    "title": "Refactor SpeechT5 output tracing to standardized output capture",
+    "updated_at": "2026-02-18T11:25:19Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "additions": 59,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Just makes sure we trigger dev version update",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44092",
-    "created_at": "2026-02-17T14:15:04Z",
-    "deletions": 159,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44092/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44092",
-    "labels": [],
-    "merged": false,
-    "number": 44092,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor output tracing for swinv2 model",
-    "updated_at": "2026-03-03T00:30:10Z"
-  },
-  {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44091",
-    "created_at": "2026-02-17T14:14:56Z",
-    "deletions": 146,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44128",
+    "created_at": "2026-02-18T10:42:21Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44091/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44091",
+    "files_url": "https://github.com/huggingface/transformers/pull/44128/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44128",
     "labels": [],
     "merged": false,
-    "number": 44091,
-    "review_comments_count": 0,
+    "number": 44128,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:11Z"
+    "title": "update release workflow",
+    "updated_at": "2026-03-30T13:40:19Z"
   },
   {
-    "additions": 25,
+    "additions": 3,
     "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "bos and eos behaviour should match when updating post processor setting `add_bos_token=True` when `bos_token=None` should silently disables `add_bos_token`. (was already the behavior for `eos_token`)",
-    "changed_files": 2,
+    "body_excerpt": "when the model_type isn't in `TOKENIZER_MAPPING_NAMES` (ex. \"llama\"), `TOKENIZER_MAPPING_NAMES.get(\"llama\", \"\")` --> \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44090",
-    "created_at": "2026-02-17T13:15:07Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44127",
+    "created_at": "2026-02-18T10:41:48Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44090/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44090",
+    "files_url": "https://github.com/huggingface/transformers/pull/44127/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44127",
     "labels": [],
     "merged": true,
-    "number": 44090,
+    "number": 44127,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update post proc",
-    "updated_at": "2026-02-18T15:34:18Z"
+    "title": "AutoTokenizer ignores config when model_type is None",
+    "updated_at": "2026-02-18T14:47:52Z"
   },
   {
-    "additions": 113,
-    "author": "preetam1407",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026",
+    "additions": 17,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848",
     "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44089",
-    "created_at": "2026-02-17T11:37:18Z",
-    "deletions": 294,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44126",
+    "created_at": "2026-02-18T09:58:49Z",
+    "deletions": 40,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44089/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44089",
+    "files_url": "https://github.com/huggingface/transformers/pull/44126/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44126",
     "labels": [],
-    "merged": false,
-    "number": 44089,
+    "merged": true,
+    "number": 44126,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor t5 output tracing",
-    "updated_at": "2026-02-17T13:45:23Z"
+    "title": "Simplify input preparation in generate",
+    "updated_at": "2026-02-18T10:30:48Z"
   },
   {
-    "additions": 41,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026",
+    "additions": 8,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44088",
-    "created_at": "2026-02-17T11:32:42Z",
-    "deletions": 129,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44125",
+    "created_at": "2026-02-18T09:34:54Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44088/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44088",
+    "files_url": "https://github.com/huggingface/transformers/pull/44125/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44125",
     "labels": [],
-    "merged": false,
-    "number": 44088,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44125,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple",
-    "updated_at": "2026-02-17T11:41:32Z"
+    "title": "Raise informative error when loading video processors",
+    "updated_at": "2026-02-20T08:23:35Z"
   },
   {
-    "additions": 16,
-    "author": "huyxdang",
-    "author_association": "NONE",
-    "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026",
+    "additions": 10,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44087",
-    "created_at": "2026-02-17T11:30:25Z",
-    "deletions": 33,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44124",
+    "created_at": "2026-02-18T08:52:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44087/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44087",
+    "files_url": "https://github.com/huggingface/transformers/pull/44124/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44124",
     "labels": [],
     "merged": false,
-    "number": 44087,
+    "number": 44124,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor Mamba2 to use standardized output tracing",
-    "updated_at": "2026-03-11T02:08:22Z"
+    "title": "feat: add eval_on_end to Trainer for final evaluation",
+    "updated_at": "2026-02-18T14:14:16Z"
   },
   {
-    "additions": 16,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026",
+    "additions": 33,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44086",
-    "created_at": "2026-02-17T11:21:22Z",
-    "deletions": 48,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44123",
+    "created_at": "2026-02-18T08:22:57Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44086/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44086",
+    "files_url": "https://github.com/huggingface/transformers/pull/44123/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44123",
     "labels": [],
     "merged": false,
-    "number": 44086,
+    "number": 44123,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:22:25Z"
+    "title": "Avoid device sync in training loss accumulation",
+    "updated_at": "2026-03-30T07:57:16Z"
   },
   {
-    "additions": 37,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44085",
-    "created_at": "2026-02-17T11:09:55Z",
-    "deletions": 108,
+    "additions": 158,
+    "author": "adityuhkapoor",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44122",
+    "created_at": "2026-02-18T06:35:09Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44085/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44085",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44122/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44122",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44085,
+    "number": 44122,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor RemBERT to use output tracing decorators",
-    "updated_at": "2026-02-17T11:10:59Z"
+    "state": "closed",
+    "title": "Add BnB 4-bit embedding quantization support",
+    "updated_at": "2026-02-18T14:27:25Z"
   },
   {
-    "additions": 37,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026",
+    "additions": 14,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44084",
-    "created_at": "2026-02-17T11:08:48Z",
-    "deletions": 108,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44120",
+    "created_at": "2026-02-17T23:56:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44084/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44084",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44120/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44120",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44084,
+    "number": 44120,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:41:38Z"
+    "title": "fix: allow image_transforms.resize to handle negative values after normalization",
+    "updated_at": "2026-02-18T14:08:54Z"
   },
   {
-    "additions": 2857,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "- TODO: - fsdp => faire comme tp en mode fsdp_plan manual qui devient l'auto par d\u00e9faut --- This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSD\u2026",
-    "changed_files": 98,
+    "additions": 1,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44083",
-    "created_at": "2026-02-17T10:57:06Z",
-    "deletions": 201,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44119",
+    "created_at": "2026-02-17T23:53:20Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44083/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44083",
+    "files_url": "https://github.com/huggingface/transformers/pull/44119/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44119",
     "labels": [],
     "merged": false,
-    "number": 44083,
-    "review_comments_count": 24,
-    "state": "open",
-    "title": "FSDP2 native support in transformers ",
-    "updated_at": "2026-04-10T08:35:55Z"
+    "number": 44119,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer",
+    "updated_at": "2026-02-18T14:04:47Z"
   },
   {
-    "additions": 6,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026",
-    "changed_files": 3,
+    "additions": 32,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44082",
-    "created_at": "2026-02-17T10:09:03Z",
-    "deletions": 20,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44118",
+    "created_at": "2026-02-17T23:31:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44082/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44082",
-    "labels": [],
-    "merged": true,
-    "number": 44082,
+    "files_url": "https://github.com/huggingface/transformers/pull/44118/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44118",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44118,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix patchtsmixer call to post_init",
-    "updated_at": "2026-02-17T11:05:40Z"
+    "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields",
+    "updated_at": "2026-02-18T14:18:12Z"
   },
   {
-    "additions": 48,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez",
-    "changed_files": 6,
+    "additions": 27,
+    "author": "dtiourine",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44081",
-    "created_at": "2026-02-17T09:54:01Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44116",
+    "created_at": "2026-02-17T21:52:13Z",
+    "deletions": 102,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44081/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44081",
+    "files_url": "https://github.com/huggingface/transformers/pull/44116/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44116",
     "labels": [],
-    "merged": true,
-    "number": 44081,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "add default flash impl",
-    "updated_at": "2026-02-19T11:29:54Z"
+    "merged": false,
+    "number": 44116,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-17T21:53:23Z"
   },
   {
-    "additions": 22,
-    "author": "tomaarsen",
+    "additions": 2,
+    "author": "Deep-unlearning",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026",
+    "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44080",
-    "created_at": "2026-02-17T09:53:36Z",
-    "deletions": 8,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44115",
+    "created_at": "2026-02-17T21:32:55Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44080/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44080",
+    "files_url": "https://github.com/huggingface/transformers/pull/44115/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44115",
     "labels": [],
     "merged": true,
-    "number": 44080,
+    "number": 44115,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`",
-    "updated_at": "2026-02-20T10:08:38Z"
+    "title": "[docs] fix broken chat_templating links in tasks docs",
+    "updated_at": "2026-02-23T16:27:57Z"
   },
   {
-    "additions": 19,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026",
-    "changed_files": 10,
+    "additions": 716,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44078",
-    "created_at": "2026-02-17T09:15:34Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44114",
+    "created_at": "2026-02-17T21:17:35Z",
+    "deletions": 1237,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44078/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44078",
+    "files_url": "https://github.com/huggingface/transformers/pull/44114/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44114",
     "labels": [],
-    "merged": true,
-    "number": 44078,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "[`fix`] Set input_modalities on various architectures that aren't just text",
-    "updated_at": "2026-02-24T10:39:31Z"
+    "merged": false,
+    "number": 44114,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators",
+    "updated_at": "2026-02-18T20:34:53Z"
   },
   {
-    "additions": 11,
-    "author": "mmahjoub5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
+    "additions": 5,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44076",
-    "created_at": "2026-02-17T08:46:55Z",
-    "deletions": 62,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44113",
+    "created_at": "2026-02-17T20:01:32Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44076/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44076",
+    "files_url": "https://github.com/huggingface/transformers/pull/44113/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44113",
     "labels": [],
-    "merged": false,
-    "number": 44076,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs",
-    "updated_at": "2026-02-18T04:11:40Z"
+    "merged": true,
+    "number": 44113,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(testing): Update stale device override test in GraniteSpeech",
+    "updated_at": "2026-02-19T11:24:29Z"
   },
   {
-    "additions": 66,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026",
-    "changed_files": 2,
+    "additions": 30,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44074",
-    "created_at": "2026-02-17T08:23:25Z",
-    "deletions": 52,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44111",
+    "created_at": "2026-02-17T19:38:02Z",
+    "deletions": 59,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44074/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44074",
+    "files_url": "https://github.com/huggingface/transformers/pull/44111/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44111",
     "labels": [],
     "merged": false,
-    "number": 44074,
+    "number": 44111,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[TextNet] Refactor output tracing using capture_outputs decorator",
-    "updated_at": "2026-02-17T11:28:11Z"
+    "state": "closed",
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:22Z"
   },
   {
-    "additions": 32,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026",
+    "additions": 28,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44073",
-    "created_at": "2026-02-17T08:16:59Z",
-    "deletions": 38,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44110",
+    "created_at": "2026-02-17T19:32:55Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44073/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44073",
+    "files_url": "https://github.com/huggingface/transformers/pull/44110/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44110",
     "labels": [],
     "merged": false,
-    "number": 44073,
+    "number": 44110,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:29:01Z"
+    "state": "closed",
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:24Z"
   },
   {
-    "additions": 12,
-    "author": "Siddhartha7340",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44072",
-    "created_at": "2026-02-17T07:42:01Z",
-    "deletions": 38,
+    "additions": 48,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44109",
+    "created_at": "2026-02-17T19:23:03Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44072/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44072",
+    "files_url": "https://github.com/huggingface/transformers/pull/44109/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44109",
     "labels": [],
     "merged": false,
-    "number": 44072,
+    "number": 44109,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026",
-    "updated_at": "2026-02-17T07:56:05Z"
+    "state": "closed",
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:25Z"
   },
   {
-    "additions": 38,
-    "author": "ArivunidhiA",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026",
+    "additions": 33,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44071",
-    "created_at": "2026-02-17T07:19:17Z",
-    "deletions": 112,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44108",
+    "created_at": "2026-02-17T19:15:00Z",
+    "deletions": 82,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44071/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44071",
+    "files_url": "https://github.com/huggingface/transformers/pull/44108/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44108",
     "labels": [],
     "merged": false,
-    "number": 44071,
+    "number": 44108,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Refactor] Migrate MPT to standardized output tracing decorators",
-    "updated_at": "2026-02-17T07:20:17Z"
+    "state": "closed",
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:27Z"
   },
   {
-    "additions": 272,
-    "author": "rudybear",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026",
-    "changed_files": 3,
+    "additions": 40,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44070",
-    "created_at": "2026-02-17T07:18:13Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44107",
+    "created_at": "2026-02-17T19:04:42Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44070/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44070",
+    "files_url": "https://github.com/huggingface/transformers/pull/44107/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44107",
     "labels": [],
     "merged": false,
-    "number": 44070,
+    "number": 44107,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture",
-    "updated_at": "2026-02-17T07:21:26Z"
+    "state": "closed",
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:29Z"
   },
   {
-    "additions": 26,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.",
-    "changed_files": 26,
+    "additions": 47,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44069",
-    "created_at": "2026-02-17T06:49:38Z",
-    "deletions": 48,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44106",
+    "created_at": "2026-02-17T18:59:25Z",
+    "deletions": 132,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44069/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44069",
+    "files_url": "https://github.com/huggingface/transformers/pull/44106/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44106",
     "labels": [],
-    "merged": true,
-    "number": 44069,
+    "merged": false,
+    "number": 44106,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Use torch.isfinite",
-    "updated_at": "2026-02-18T01:04:19Z"
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:30Z"
   },
   {
-    "additions": 42,
-    "author": "mtthw13",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026",
-    "changed_files": 2,
+    "additions": 39,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44068",
-    "created_at": "2026-02-17T06:13:37Z",
-    "deletions": 119,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44105",
+    "created_at": "2026-02-17T18:54:40Z",
+    "deletions": 127,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44068/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44068",
+    "files_url": "https://github.com/huggingface/transformers/pull/44105/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44105",
     "labels": [],
     "merged": false,
-    "number": 44068,
+    "number": 44105,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators",
-    "updated_at": "2026-02-18T08:30:32Z"
+    "state": "closed",
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:32Z"
   },
   {
-    "additions": 63,
-    "author": "23atharvaS",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026",
-    "changed_files": 3,
+    "additions": 66,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44067",
-    "created_at": "2026-02-17T05:25:26Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44104",
+    "created_at": "2026-02-17T18:43:44Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44067/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44067",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44104/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44104",
+    "labels": [],
     "merged": false,
-    "number": 44067,
+    "number": 44104,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `eval_on_end` argument to Trainer for final evaluation after training",
-    "updated_at": "2026-02-17T13:32:34Z"
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:34Z"
   },
   {
-    "additions": 35,
-    "author": "Jay-IIT",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44066",
-    "created_at": "2026-02-17T05:12:11Z",
-    "deletions": 107,
+    "additions": 53,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44103",
+    "created_at": "2026-02-17T18:04:48Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44066/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44066",
+    "files_url": "https://github.com/huggingface/transformers/pull/44103/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44103",
     "labels": [],
     "merged": false,
-    "number": 44066,
+    "number": 44103,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor GPT-J to use standardized output tracing (#43979)",
-    "updated_at": "2026-02-18T18:44:28Z"
+    "state": "closed",
+    "title": "Fix glm_moe_dsa",
+    "updated_at": "2026-02-18T19:38:11Z"
   },
-  {
-    "additions": 21,
-    "author": "tysoncung",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026",
+  {
+    "additions": 42,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44065",
-    "created_at": "2026-02-17T02:03:57Z",
-    "deletions": 76,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44102",
+    "created_at": "2026-02-17T17:21:32Z",
+    "deletions": 154,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44065/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44065",
+    "files_url": "https://github.com/huggingface/transformers/pull/44102/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44102",
     "labels": [],
     "merged": false,
-    "number": 44065,
+    "number": 44102,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple",
-    "updated_at": "2026-02-25T00:49:18Z"
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-18T21:19:35Z"
   },
   {
-    "additions": 57,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026",
+    "additions": 210,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44064",
-    "created_at": "2026-02-17T01:10:31Z",
-    "deletions": 16,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44101",
+    "created_at": "2026-02-17T17:15:06Z",
+    "deletions": 194,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44064/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44064",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44101",
+    "labels": [],
     "merged": false,
-    "number": 44064,
+    "number": 44101,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "feat: implement eval_on_end to trigger evaluation after training",
-    "updated_at": "2026-02-17T13:32:40Z"
+    "state": "open",
+    "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture",
+    "updated_at": "2026-02-19T08:08:33Z"
   },
   {
-    "additions": 229,
-    "author": "AutumnAurelium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026",
-    "changed_files": 5,
+    "additions": 3,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44063",
-    "created_at": "2026-02-17T01:07:13Z",
-    "deletions": 150,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44100",
+    "created_at": "2026-02-17T17:10:36Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44063/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44063",
+    "files_url": "https://github.com/huggingface/transformers/pull/44100/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44100",
     "labels": [],
     "merged": true,
-    "number": 44063,
-    "review_comments_count": 6,
+    "number": 44100,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update AFMoE architecture to use v5-style MoE impl",
-    "updated_at": "2026-03-19T14:00:46Z"
+    "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps",
+    "updated_at": "2026-02-20T09:57:51Z"
   },
   {
     "additions": 2,
-    "author": "tarekziade",
+    "author": "qgallouedec",
     "author_association": "MEMBER",
-    "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44061",
-    "created_at": "2026-02-16T22:08:48Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44099",
+    "created_at": "2026-02-17T16:45:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44061/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44061",
+    "files_url": "https://github.com/huggingface/transformers/pull/44099/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44099",
     "labels": [],
     "merged": true,
-    "number": 44061,
+    "number": 44099,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: flaky `Kosmos2ModelTest` test",
-    "updated_at": "2026-02-18T14:23:30Z"
+    "title": "Bump dev version",
+    "updated_at": "2026-02-18T10:03:54Z"
   },
   {
-    "additions": 44,
-    "author": "lakprigan",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary Migrates GPT2 to the standardized output collection interface as part of #43979. - Added `_can_record_outputs` to `GPT2PreTrainedModel` (including `cross_attentions` via `OutputRecorder` targeting the `crossattention` submodule)\u2026",
-    "changed_files": 1,
+    "additions": 125,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors ViLT's output handling to align with the standardized patterns used across the codebase. Key changes: - Removes manual `hidden_states`/`attentions` propagation and passes `output_attentions`, `out\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44059",
-    "created_at": "2026-02-16T20:14:30Z",
-    "deletions": 133,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44098",
+    "created_at": "2026-02-17T16:32:34Z",
+    "deletions": 138,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44059/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44059",
+    "files_url": "https://github.com/huggingface/transformers/pull/44098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44098",
     "labels": [],
     "merged": false,
-    "number": 44059,
+    "number": 44098,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[GPT2] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-25T17:47:45Z"
+    "title": "[ViLT] Refactor output handling to align with standardized patterns",
+    "updated_at": "2026-02-17T16:37:46Z"
   },
   {
-    "additions": 122,
-    "author": "engmohamedsalah",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44052 \u2014 resolves 10 of 11 skipped tests for the `glm_moe_dsa` model. **Root causes fixed:** - **DSA indexer mask shape mismatch**: The attention mask was not properly normalized to 4D before being passed to the indexer an\u2026",
-    "changed_files": 3,
+    "additions": 12,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The `test_keep_in_fp32_modules` issues in #44052 are because the test assumes a model has **either** `_keep_in_fp32_modules` or `_keep_in_fp32_modules_strict` **but not both.** The only model that uses both is `glm_moe_dsa`, so this is the\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44058",
-    "created_at": "2026-02-16T19:24:30Z",
-    "deletions": 84,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44097",
+    "created_at": "2026-02-17T15:43:55Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44058/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44058",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44058,
+    "files_url": "https://github.com/huggingface/transformers/pull/44097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44097",
+    "labels": [],
+    "merged": true,
+    "number": 44097,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Merge test_keep_in_fp32_modules and test_keep_in_fp32_modules_strict",
+    "updated_at": "2026-02-17T16:23:33Z"
+  },
+  {
+    "additions": 3,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tests were written (and pass) on DGX A100, here are the values for our runners.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44096",
+    "created_at": "2026-02-17T15:14:26Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44096/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44096",
+    "labels": [],
+    "merged": true,
+    "number": 44096,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix skipped tests for glm_moe_dsa model",
-    "updated_at": "2026-02-17T17:23:03Z"
+    "title": "[voxtral-realtime] update runner expected values ",
+    "updated_at": "2026-02-17T15:23:19Z"
   },
   {
-    "additions": 0,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR addresses memory efficiency issues in the Qwen2Moe implementation (reported in #43856). Users experienced Out-of-Memory (OOM) errors during quantization and inference, particularly with large reserved memory (e.g., 27GB on H100) th\u2026",
-    "changed_files": 0,
+    "additions": 43,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. The check that was added in https://github.com/huggingface/transformers/pull/43768 is wrong, as a missing weight would NOT be reinitialized in some cases! As for the pointers check, it is actually\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44095",
+    "created_at": "2026-02-17T14:33:22Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44095/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44095",
+    "labels": [],
+    "merged": true,
+    "number": 44095,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix loading logic issue",
+    "updated_at": "2026-04-03T04:52:35Z"
+  },
+  {
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44057",
-    "created_at": "2026-02-16T18:35:01Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44094",
+    "created_at": "2026-02-17T14:15:10Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44057/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44057",
+    "files_url": "https://github.com/huggingface/transformers/pull/44094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44094",
     "labels": [],
     "merged": false,
-    "number": 44057,
+    "number": 44094,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(qwen3_moe): optimize memory and fix OOM in MoE layers",
-    "updated_at": "2026-02-16T21:47:41Z"
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:07Z"
   },
   {
-    "additions": 50,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates MPNet to the new standardized output tracing system using the `@capture_outputs` decorator. Specifically, this PR: - Applies `@capture_outputs` to `MPNetModel.forward` - Removes manual accumulation\u2026",
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44056",
-    "created_at": "2026-02-16T18:27:05Z",
-    "deletions": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44093",
+    "created_at": "2026-02-17T14:15:07Z",
+    "deletions": 129,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44056/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44056",
+    "files_url": "https://github.com/huggingface/transformers/pull/44093/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44093",
     "labels": [],
     "merged": false,
-    "number": 44056,
+    "number": 44093,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[MPNet] Refactor output tracing using capture_outputs decorator",
-    "updated_at": "2026-02-17T11:23:12Z"
+    "state": "closed",
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:09Z"
   },
   {
-    "additions": 5,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44055",
-    "created_at": "2026-02-16T18:26:43Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44092",
+    "created_at": "2026-02-17T14:15:04Z",
+    "deletions": 159,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44055/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44055",
+    "files_url": "https://github.com/huggingface/transformers/pull/44092/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44092",
     "labels": [],
-    "merged": true,
-    "number": 44055,
+    "merged": false,
+    "number": 44092,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix unprotected torch import",
-    "updated_at": "2026-02-16T18:43:01Z"
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-03-03T00:30:10Z"
   },
   {
-    "additions": 346,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add flash MLA interface. - It does not work I get a segfault - we don't leverage the paged cache so it's not as efficient as that I reckon. ```bash Fetching 6 files: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2026",
-    "changed_files": 10,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44054",
-    "created_at": "2026-02-16T18:07:14Z",
-    "deletions": 93,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44054/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44054",
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44091",
+    "created_at": "2026-02-17T14:14:56Z",
+    "deletions": 146,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44091/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44091",
     "labels": [],
     "merged": false,
-    "number": 44054,
+    "number": 44091,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Flash mla interface",
-    "updated_at": "2026-02-20T11:14:39Z"
+    "state": "closed",
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:11Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
+    "additions": 25,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
+    "body_excerpt": "bos and eos behaviour should match when updating post processor setting `add_bos_token=True` when `bos_token=None` should silently disables `add_bos_token`. (was already the behavior for `eos_token`)",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44053",
-    "created_at": "2026-02-16T17:59:48Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44090",
+    "created_at": "2026-02-17T13:15:07Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44053/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44053",
+    "files_url": "https://github.com/huggingface/transformers/pull/44090/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44090",
     "labels": [],
     "merged": true,
-    "number": 44053,
+    "number": 44090,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix peft conversion typo",
-    "updated_at": "2026-02-17T11:12:19Z"
+    "title": "Update post proc",
+    "updated_at": "2026-02-18T15:34:18Z"
   },
   {
-    "additions": 2,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Fix incorrect forward type hint for Gemma3n ## Details The type hint didn't match the actual returned class: https://github.com/huggingface/transformers/blob/349e00c1a367ce263624e525038250625dcf20c7/src/transforme\u2026",
+    "additions": 113,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44051",
-    "created_at": "2026-02-16T17:26:24Z",
-    "deletions": 2,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44089",
+    "created_at": "2026-02-17T11:37:18Z",
+    "deletions": 294,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44051/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44051",
+    "files_url": "https://github.com/huggingface/transformers/pull/44089/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44089",
     "labels": [],
-    "merged": true,
-    "number": 44051,
+    "merged": false,
+    "number": 44089,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`chore`] Fix incorrect forward type hint for Gemma3n",
-    "updated_at": "2026-02-20T09:08:07Z"
+    "title": "Refactor t5 output tracing",
+    "updated_at": "2026-02-17T13:45:23Z"
   },
   {
-    "additions": 15,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Patch `get_text_features` for ChineseCLIP ### Details The `get_text_features` assumes that the `text_model` returns a `BaseModelOutputWithPooling`, just like is done with many other models. Currently, the `get_tex\u2026",
-    "changed_files": 7,
+    "additions": 41,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44050",
-    "created_at": "2026-02-16T17:23:31Z",
-    "deletions": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44088",
+    "created_at": "2026-02-17T11:32:42Z",
+    "deletions": 129,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44050/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44050",
+    "files_url": "https://github.com/huggingface/transformers/pull/44088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44088",
     "labels": [],
     "merged": false,
-    "number": 44050,
-    "review_comments_count": 4,
+    "number": 44088,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Patch `get_text_features` for ChineseCLIP",
-    "updated_at": "2026-02-17T09:55:17Z"
+    "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-17T11:41:32Z"
   },
   {
-    "additions": 59,
-    "author": "ManasVardhan",
+    "additions": 16,
+    "author": "huyxdang",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `fnet` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of #43979. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": FNetLayer}` to `FNetPreTrainedModel`\u2026",
-    "changed_files": 1,
+    "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44049",
-    "created_at": "2026-02-16T17:19:04Z",
-    "deletions": 112,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44087",
+    "created_at": "2026-02-17T11:30:25Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44049/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44049",
+    "files_url": "https://github.com/huggingface/transformers/pull/44087/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44087",
     "labels": [],
     "merged": false,
-    "number": 44049,
+    "number": 44087,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor fnet model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:13Z"
+    "title": "Refactor Mamba2 to use standardized output tracing",
+    "updated_at": "2026-03-11T02:08:22Z"
   },
   {
-    "additions": 4,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Fix up `__repr__` whitespace/brackets ## Reproducer ```python from transformers import AutoTokenizer, PreTrainedTokenizerBase # __repr__ via PreTrainedTokenizerBase tokenizer = AutoTokenizer.from_pretrained(\"bert-\u2026",
-    "changed_files": 2,
+    "additions": 16,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44048",
-    "created_at": "2026-02-16T17:18:10Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44086",
+    "created_at": "2026-02-17T11:21:22Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44048/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44048",
+    "files_url": "https://github.com/huggingface/transformers/pull/44086/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44086",
     "labels": [],
-    "merged": true,
-    "number": 44048,
+    "merged": false,
+    "number": 44086,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[`simple`] Fix up `__repr__` whitespace/brackets",
-    "updated_at": "2026-02-20T10:03:34Z"
+    "state": "open",
+    "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:22:25Z"
   },
   {
-    "additions": 35,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `bloom` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of the effort in #43979. ### Changes: - Add `_can_record_outputs` dict to `BloomPreTrainedModel` mapping `hi\u2026",
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44047",
-    "created_at": "2026-02-16T17:15:25Z",
-    "deletions": 104,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44085",
+    "created_at": "2026-02-17T11:09:55Z",
+    "deletions": 108,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44047/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44047",
+    "files_url": "https://github.com/huggingface/transformers/pull/44085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44085",
     "labels": [],
     "merged": false,
-    "number": 44047,
+    "number": 44085,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor bloom model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:14Z"
+    "state": "open",
+    "title": "Refactor RemBERT to use output tracing decorators",
+    "updated_at": "2026-02-17T11:10:59Z"
   },
   {
-    "additions": 24,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44046",
-    "created_at": "2026-02-16T17:07:38Z",
-    "deletions": 70,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44084",
+    "created_at": "2026-02-17T11:08:48Z",
+    "deletions": 108,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44046/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44046",
+    "files_url": "https://github.com/huggingface/transformers/pull/44084/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44084",
     "labels": [],
     "merged": false,
-    "number": 44046,
+    "number": 44084,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-02-17T14:15:23Z"
+    "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:41:38Z"
   },
   {
-    "additions": 456215,
-    "author": "ArthurZucker",
+    "additions": 1555,
+    "author": "3outeille",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 4939,
+    "body_excerpt": "- TODO: - fsdp => faire comme tp en mode fsdp_plan manual qui devient l'auto par d\u00e9faut --- This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSD\u2026",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44045",
-    "created_at": "2026-02-16T17:01:41Z",
-    "deletions": 591028,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44045/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44045",
-    "labels": [],
-    "merged": false,
-    "number": 44045,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Flash-mla-interface",
-    "updated_at": "2026-02-16T17:11:51Z"
-  },
-  {
-    "additions": 49,
-    "author": "rwtarpit",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44044",
-    "created_at": "2026-02-16T16:43:19Z",
-    "deletions": 112,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44083",
+    "created_at": "2026-02-17T10:57:06Z",
+    "deletions": 120,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44044/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44044",
+    "files_url": "https://github.com/huggingface/transformers/pull/44083/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44083",
     "labels": [],
     "merged": false,
-    "number": 44044,
-    "review_comments_count": 0,
+    "number": 44083,
+    "review_comments_count": 24,
     "state": "open",
-    "title": "Refactor DeBERTa's output tracing interface",
-    "updated_at": "2026-02-16T18:57:29Z"
+    "title": "FSDP2 native support in transformers ",
+    "updated_at": "2026-04-14T13:58:30Z"
   },
   {
-    "additions": 170,
-    "author": "IlyasMoutawwakil",
+    "additions": 6,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 31,
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44043",
-    "created_at": "2026-02-16T16:23:57Z",
-    "deletions": 162,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44082",
+    "created_at": "2026-02-17T10:09:03Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44043/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44043",
+    "files_url": "https://github.com/huggingface/transformers/pull/44082/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44082",
     "labels": [],
     "merged": true,
-    "number": 44043,
-    "review_comments_count": 15,
+    "number": 44082,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "`grouped_mm` fallback",
-    "updated_at": "2026-02-23T13:58:09Z"
+    "title": "Fix patchtsmixer call to post_init",
+    "updated_at": "2026-02-17T11:05:40Z"
   },
   {
-    "additions": 1,
-    "author": "Rocketknight1",
+    "additions": 48,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "Joao is regrettably no longer with us :saluting_face: so we should really stop getting users to ping him! This PR makes @cyrilvallez responsible for `generate` issues outside of VLMs.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44042",
-    "created_at": "2026-02-16T16:00:36Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44081",
+    "created_at": "2026-02-17T09:54:01Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44042/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44042",
+    "files_url": "https://github.com/huggingface/transformers/pull/44081/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44081",
     "labels": [],
     "merged": true,
-    "number": 44042,
-    "review_comments_count": 0,
+    "number": 44081,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Update assignee for generate in bug report template",
-    "updated_at": "2026-02-16T16:09:19Z"
+    "title": "add default flash impl",
+    "updated_at": "2026-02-19T11:29:54Z"
   },
   {
-    "additions": 469,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? Alternate PR to #43985 to be a reorder only PR. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, s\u2026",
-    "changed_files": 4,
+    "additions": 22,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44041",
-    "created_at": "2026-02-16T15:40:41Z",
-    "deletions": 457,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44080",
+    "created_at": "2026-02-17T09:53:36Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44041/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44041",
+    "files_url": "https://github.com/huggingface/transformers/pull/44080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44080",
     "labels": [],
     "merged": true,
-    "number": 44041,
-    "review_comments_count": 14,
+    "number": 44080,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor _inner_training_loop to smaller methods",
-    "updated_at": "2026-02-23T16:52:09Z"
+    "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`",
+    "updated_at": "2026-02-20T10:08:38Z"
   },
   {
-    "additions": 366,
-    "author": "zucchini-nlp",
+    "additions": 19,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44008 and re-enables tests",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44040",
-    "created_at": "2026-02-16T12:43:28Z",
-    "deletions": 230,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44078",
+    "created_at": "2026-02-17T09:15:34Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44040/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44040",
+    "files_url": "https://github.com/huggingface/transformers/pull/44078/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44078",
     "labels": [],
     "merged": true,
-    "number": 44040,
-    "review_comments_count": 14,
+    "number": 44078,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix gemma3n `get_audio_features`",
-    "updated_at": "2026-02-19T12:50:00Z"
+    "title": "[`fix`] Set input_modalities on various architectures that aren't just text",
+    "updated_at": "2026-02-24T10:39:31Z"
   },
   {
-    "additions": 47,
-    "author": "itzyesse99-lgtm",
-    "author_association": "NONE",
-    "body_excerpt": "```diff diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index 1234567..8901234 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -10,6 +10,7 @@ from transformers import PreTrai\u2026",
+    "additions": 11,
+    "author": "mmahjoub5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44039",
-    "created_at": "2026-02-16T12:01:26Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44076",
+    "created_at": "2026-02-17T08:46:55Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44039/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44039",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44076",
+    "labels": [],
     "merged": false,
-    "number": 44039,
+    "number": 44076,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "AI Fix for #43979",
-    "updated_at": "2026-03-14T12:34:32Z"
+    "state": "open",
+    "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs",
+    "updated_at": "2026-02-18T04:11:40Z"
   },
   {
-    "additions": 23,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/43913",
-    "changed_files": 3,
+    "additions": 66,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44037",
-    "created_at": "2026-02-16T11:02:12Z",
-    "deletions": 10,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44074",
+    "created_at": "2026-02-17T08:23:25Z",
+    "deletions": 52,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44037/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44037",
+    "files_url": "https://github.com/huggingface/transformers/pull/44074/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44074",
     "labels": [],
-    "merged": true,
-    "number": 44037,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Add a dim check mechanism in Transpose and fix qwen3_vl_moe weight mapping",
-    "updated_at": "2026-02-16T16:01:12Z"
+    "merged": false,
+    "number": 44074,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[TextNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:28:11Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? So the following logic added in a previous PR #44033 could take effect ```python # `include_all` is `True` when the CI is running on a pull request, so it treats all failing tests # in the current CI run as \"new fai\u2026",
+    "additions": 32,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44036",
-    "created_at": "2026-02-16T10:14:54Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44073",
+    "created_at": "2026-02-17T08:16:59Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44036/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44036",
+    "files_url": "https://github.com/huggingface/transformers/pull/44073/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44073",
     "labels": [],
-    "merged": true,
-    "number": 44036,
+    "merged": false,
+    "number": 44073,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Remove `other_workflow_run_ids` for `issue_comment` in `utils/notification_service.py`",
-    "updated_at": "2026-02-16T10:24:07Z"
+    "state": "open",
+    "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:29:01Z"
   },
   {
-    "additions": 25,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We already brought it back with: ```python if clean_up_tokenization_spaces: # Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement) if hasattr(self, \"clean_up_tokenization\") and callab\u2026",
-    "changed_files": 7,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44035",
-    "created_at": "2026-02-16T09:49:28Z",
-    "deletions": 112,
+    "additions": 12,
+    "author": "Siddhartha7340",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44072",
+    "created_at": "2026-02-17T07:42:01Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44035/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44035",
+    "files_url": "https://github.com/huggingface/transformers/pull/44072/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44072",
     "labels": [],
-    "merged": true,
-    "number": 44035,
+    "merged": false,
+    "number": 44072,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "bring back our demons: clean_up_tokenization_spaces",
-    "updated_at": "2026-02-20T14:50:18Z"
+    "state": "open",
+    "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026",
+    "updated_at": "2026-02-17T07:56:05Z"
   },
   {
-    "additions": 18,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 38,
+    "author": "ArivunidhiA",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44034",
-    "created_at": "2026-02-16T08:04:20Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44071",
+    "created_at": "2026-02-17T07:19:17Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44034/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44034",
+    "files_url": "https://github.com/huggingface/transformers/pull/44071/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44071",
     "labels": [],
     "merged": false,
-    "number": 44034,
+    "number": 44071,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "don't merge check workflow",
-    "updated_at": "2026-02-16T10:52:50Z"
+    "state": "open",
+    "title": "[Refactor] Migrate MPT to standardized output tracing decorators",
+    "updated_at": "2026-02-17T07:20:17Z"
   },
   {
-    "additions": 143,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Improve new failing test analysis for PR comment CI",
+    "additions": 272,
+    "author": "rudybear",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44033",
-    "created_at": "2026-02-16T07:30:33Z",
-    "deletions": 49,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44070",
+    "created_at": "2026-02-17T07:18:13Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44033/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44033",
+    "files_url": "https://github.com/huggingface/transformers/pull/44070/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44070",
     "labels": [],
-    "merged": true,
-    "number": 44033,
+    "merged": false,
+    "number": 44070,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Improve new failing test analysis for PR comment CI",
-    "updated_at": "2026-02-16T08:02:16Z"
+    "state": "open",
+    "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture",
+    "updated_at": "2026-02-17T07:21:26Z"
   },
   {
-    "additions": 3,
-    "author": "JJJYmmm",
+    "additions": 26,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update FP8 expert replacement to use `model.config.text_config` when available (VLMs), falling back to model.config if it's text-only models.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.",
+    "changed_files": 26,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44032",
-    "created_at": "2026-02-16T06:02:28Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44069",
+    "created_at": "2026-02-17T06:49:38Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44032/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44032",
+    "files_url": "https://github.com/huggingface/transformers/pull/44069/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44069",
     "labels": [],
     "merged": true,
-    "number": 44032,
-    "review_comments_count": 2,
+    "number": 44069,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Misc][vlms] Use text_config when initializing the fine-grained FP8Expert",
-    "updated_at": "2026-02-19T10:28:31Z"
+    "title": "Use torch.isfinite",
+    "updated_at": "2026-02-18T01:04:19Z"
   },
   {
-    "additions": 11,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`dpr` model as per #43979 cc @molbap <img width=\"853\" height=\"323\" alt=\"Screenshot 2026-02-16 at 9 13 30 AM\" src=\"https://github.com/user-attachments/assets/d658f1d0-75e8-4eac-8a12-9aeddf194dde\" />",
-    "changed_files": 1,
+    "additions": 42,
+    "author": "mtthw13",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44030",
-    "created_at": "2026-02-16T03:44:19Z",
-    "deletions": 58,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44068",
+    "created_at": "2026-02-17T06:13:37Z",
+    "deletions": 119,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44030/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44030",
+    "files_url": "https://github.com/huggingface/transformers/pull/44068/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44068",
     "labels": [],
     "merged": false,
-    "number": 44030,
-    "review_comments_count": 1,
+    "number": 44068,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "refactor output tracing in `dpr`",
-    "updated_at": "2026-02-17T07:46:00Z"
+    "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators",
+    "updated_at": "2026-02-18T08:30:32Z"
   },
   {
-    "additions": 21,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`rwkv` model as per #43979 cc @molbap <img width=\"856\" height=\"333\" alt=\"Screenshot 2026-02-16 at 9 06 34 AM\" src=\"https://github.com/user-attachments/assets/9c8c5d41-ffbd-45f6-8b9b-1429bcb14543\" />",
-    "changed_files": 1,
+    "additions": 63,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44029",
-    "created_at": "2026-02-16T03:37:13Z",
-    "deletions": 55,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44067",
+    "created_at": "2026-02-17T05:25:26Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44029/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44029",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44067",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44029,
+    "number": 44067,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `rwkv`",
-    "updated_at": "2026-02-17T07:47:02Z"
+    "state": "closed",
+    "title": "Add `eval_on_end` argument to Trainer for final evaluation after training",
+    "updated_at": "2026-02-17T13:32:34Z"
   },
   {
-    "additions": 13,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`superpoint` model as per #43979 cc @molbap <img width=\"857\" height=\"334\" alt=\"Screenshot 2026-02-16 at 8 53 43 AM\" src=\"https://github.com/user-attachments/assets/17781b76-743b-4b38-923a-8db3b94ccd01\" />",
+    "additions": 35,
+    "author": "Jay-IIT",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44028",
-    "created_at": "2026-02-16T03:25:14Z",
-    "deletions": 46,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44066",
+    "created_at": "2026-02-17T05:12:11Z",
+    "deletions": 107,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44028/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44028",
+    "files_url": "https://github.com/huggingface/transformers/pull/44066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44066",
     "labels": [],
     "merged": false,
-    "number": 44028,
+    "number": 44066,
     "review_comments_count": 0,
     "state": "open",
-    "title": "refactor output tracing for `superpoint`",
-    "updated_at": "2026-02-17T07:46:06Z"
+    "title": "Refactor GPT-J to use standardized output tracing (#43979)",
+    "updated_at": "2026-02-18T18:44:28Z"
   },
   {
-    "additions": 6,
-    "author": "omkar-334",
+    "additions": 21,
+    "author": "tysoncung",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `speech_encoder_decoder` model as per #43979 cc @molbap <img width=\"852\" height=\"335\" alt=\"Screenshot 2026-02-16 at 8 44 05 AM\" src=\"https://github.com/user-attachments/assets/ee25c72b-b995-403c-b47b-3e9cbae0d2cc\" />",
+    "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44027",
-    "created_at": "2026-02-16T03:14:41Z",
-    "deletions": 22,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44065",
+    "created_at": "2026-02-17T02:03:57Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44027/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44027",
+    "files_url": "https://github.com/huggingface/transformers/pull/44065/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44065",
     "labels": [],
     "merged": false,
-    "number": 44027,
+    "number": 44065,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `speech_encoder_decoder`",
-    "updated_at": "2026-02-17T09:04:35Z"
+    "state": "closed",
+    "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple",
+    "updated_at": "2026-02-25T00:49:18Z"
   },
   {
-    "additions": 12,
-    "author": "omkar-334",
+    "additions": 57,
+    "author": "mariam851",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`vision_encoder_decoder` model as per #43979 cc @molbap <img width=\"849\" height=\"333\" alt=\"Screenshot 2026-02-16 at 8 28 20 AM\" src=\"https://github.com/user-attachments/assets/9f511a17-947b-46ed-82a8-8bb9bb103f15\" />",
-    "changed_files": 1,
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44026",
-    "created_at": "2026-02-16T02:59:14Z",
-    "deletions": 22,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44064",
+    "created_at": "2026-02-17T01:10:31Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44026/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44026",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44064/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44064",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44026,
+    "number": 44064,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `vision_encoder_decoder`",
-    "updated_at": "2026-02-17T09:05:22Z"
+    "state": "closed",
+    "title": "feat: implement eval_on_end to trigger evaluation after training",
+    "updated_at": "2026-02-17T13:32:40Z"
   },
   {
-    "additions": 7,
-    "author": "omkar-334",
+    "additions": 229,
+    "author": "AutumnAurelium",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `depth_anything` model as per #43979 cc @molbap <img width=\"840\" height=\"330\" alt=\"Screenshot 2026-02-16 at 8 25 01 AM\" src=\"https://github.com/user-attachments/assets/fe7770be-70cb-4343-accb-7407c6bbb4f8\" />",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44025",
-    "created_at": "2026-02-16T02:56:17Z",
-    "deletions": 23,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44063",
+    "created_at": "2026-02-17T01:07:13Z",
+    "deletions": 150,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44025/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44025",
+    "files_url": "https://github.com/huggingface/transformers/pull/44063/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44063",
     "labels": [],
-    "merged": false,
-    "number": 44025,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `depth_anything`",
-    "updated_at": "2026-02-17T07:46:31Z"
+    "merged": true,
+    "number": 44063,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Update AFMoE architecture to use v5-style MoE impl",
+    "updated_at": "2026-03-19T14:00:46Z"
   },
   {
-    "additions": 15,
-    "author": "mmahjoub5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR refactors the FocalNet implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44024",
-    "created_at": "2026-02-15T23:48:12Z",
-    "deletions": 60,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44061",
+    "created_at": "2026-02-16T22:08:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44024/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44024",
+    "files_url": "https://github.com/huggingface/transformers/pull/44061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44061",
     "labels": [],
-    "merged": false,
-    "number": 44024,
+    "merged": true,
+    "number": 44061,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Focalnet standardized outputs",
-    "updated_at": "2026-02-17T08:47:48Z"
+    "state": "closed",
+    "title": "Fix: flaky `Kosmos2ModelTest` test",
+    "updated_at": "2026-02-18T14:23:30Z"
   },
   {
-    "additions": 32,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the Nystromformer model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Add `_can_record_outputs` on `Nystromform\u2026",
+    "additions": 44,
+    "author": "lakprigan",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary Migrates GPT2 to the standardized output collection interface as part of #43979. - Added `_can_record_outputs` to `GPT2PreTrainedModel` (including `cross_attentions` via `OutputRecorder` targeting the `crossattention` submodule)\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44023",
-    "created_at": "2026-02-15T21:53:48Z",
-    "deletions": 122,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44059",
+    "created_at": "2026-02-16T20:14:30Z",
+    "deletions": 133,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44023/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44023",
+    "files_url": "https://github.com/huggingface/transformers/pull/44059/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44059",
     "labels": [],
     "merged": false,
-    "number": 44023,
+    "number": 44059,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor Nystromformer output tracing with @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:15Z"
+    "state": "open",
+    "title": "[GPT2] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-25T17:47:45Z"
   },
   {
-    "additions": 57,
-    "author": "ManasVardhan",
+    "additions": 122,
+    "author": "engmohamedsalah",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the ConvBERT model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Move `ConvBertPreTrainedModel` after layer def\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "## Summary Fixes #44052 \u2014 resolves 10 of 11 skipped tests for the `glm_moe_dsa` model. **Root causes fixed:** - **DSA indexer mask shape mismatch**: The attention mask was not properly normalized to 4D before being passed to the indexer an\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44022",
-    "created_at": "2026-02-15T21:49:57Z",
-    "deletions": 152,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44058",
+    "created_at": "2026-02-16T19:24:30Z",
+    "deletions": 84,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44022/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44022",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44058/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44058",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44022,
+    "number": 44058,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ConvBERT output tracing with @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:17Z"
+    "title": "Fix skipped tests for glm_moe_dsa model",
+    "updated_at": "2026-02-17T17:23:03Z"
   },
   {
-    "additions": 22,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #43906 (related to #38071) ### Problem When using `pipeline('text-generation')` with batched inference on Qwen3 (and other models where `pad_token_id == bos_token_id`), a spurious warning is emitted: > A deco\u2026",
-    "changed_files": 3,
+    "additions": 0,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR addresses memory efficiency issues in the Qwen2Moe implementation (reported in #43856). Users experienced Out-of-Memory (OOM) errors during quantization and inference, particularly with large reserved memory (e.g., 27GB on H100) th\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44021",
-    "created_at": "2026-02-15T21:45:58Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44057",
+    "created_at": "2026-02-16T18:35:01Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44021/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44021",
+    "files_url": "https://github.com/huggingface/transformers/pull/44057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44057",
     "labels": [],
-    "merged": true,
-    "number": 44021,
+    "merged": false,
+    "number": 44057,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix false positive right-padding warning for decoder-only models in pipeline",
-    "updated_at": "2026-02-17T10:41:32Z"
+    "title": "fix(qwen3_moe): optimize memory and fix OOM in MoE layers",
+    "updated_at": "2026-02-16T21:47:41Z"
   },
   {
-    "additions": 28,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "additions": 50,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates MPNet to the new standardized output tracing system using the `@capture_outputs` decorator. Specifically, this PR: - Applies `@capture_outputs` to `MPNetModel.forward` - Removes manual accumulation\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44020",
-    "created_at": "2026-02-15T21:39:17Z",
-    "deletions": 129,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44056",
+    "created_at": "2026-02-16T18:27:05Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44020/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44020",
+    "files_url": "https://github.com/huggingface/transformers/pull/44056/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44056",
     "labels": [],
     "merged": false,
-    "number": 44020,
+    "number": 44056,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-02-17T14:15:21Z"
+    "state": "open",
+    "title": "[MPNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:23:12Z"
   },
   {
-    "additions": 17,
-    "author": "Sid-V5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Refactored the `resnet` model to use the standardized output tracing decorators (`@capture_outputs` and `@can_return_tuple`) as part of the migration ### Changes | File | Change | |------|--------| | `modeling_resnet.py` | Migrated to `@ca\u2026",
+    "additions": 5,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44019",
-    "created_at": "2026-02-15T19:53:19Z",
-    "deletions": 62,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44055",
+    "created_at": "2026-02-16T18:26:43Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44019/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44019",
+    "files_url": "https://github.com/huggingface/transformers/pull/44055/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44055",
     "labels": [],
-    "merged": false,
-    "number": 44019,
+    "merged": true,
+    "number": 44055,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor `resnet` to use `@capture_outputs` / `@can_return_tuple` output tracing",
-    "updated_at": "2026-02-15T20:01:23Z"
+    "state": "closed",
+    "title": "Fix unprotected torch import",
+    "updated_at": "2026-02-16T18:43:01Z"
   },
   {
-    "additions": 41,
-    "author": "yashbora9",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary - Migrates `gpt_neo` to the standardized output collection interface as part of #43979 - Adds `@capture_outputs` decorator on `GPTNeoModel.forward` (base model) - Adds `@can_return_tuple` decorator on all wrapper model forwards\u2026",
-    "changed_files": 2,
+    "additions": 346,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add flash MLA interface. - It does not work I get a segfault - we don't leverage the paged cache so it's not as efficient as that I reckon. ```bash Fetching 6 files: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44018",
-    "created_at": "2026-02-15T19:35:06Z",
-    "deletions": 109,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44018/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44018",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44054",
+    "created_at": "2026-02-16T18:07:14Z",
+    "deletions": 93,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44054/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44054",
     "labels": [],
     "merged": false,
-    "number": 44018,
+    "number": 44054,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Refactor GPT-Neo output tracing to use capture_outputs/can_return_tuple",
-    "updated_at": "2026-02-16T20:33:37Z"
+    "title": "Flash mla interface",
+    "updated_at": "2026-02-20T11:14:39Z"
   },
   {
-    "additions": 13,
-    "author": "nexiouscaliver",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR refactors \\`SegformersPreTrainedModel\\` and \\`SegformersForImageClassification\\` to use standardized \\`@capture_outputs\\` and \\`@can_return_tuple\\` decorators for automatic output collection. ### Changes 1. **Imported \\`@capture_ou\u2026",
+    "additions": 2,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44017",
-    "created_at": "2026-02-15T19:27:22Z",
-    "deletions": 6,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44053",
+    "created_at": "2026-02-16T17:59:48Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44017/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44017",
+    "files_url": "https://github.com/huggingface/transformers/pull/44053/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44053",
     "labels": [],
-    "merged": false,
-    "number": 44017,
+    "merged": true,
+    "number": 44053,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor output tracing in segformers (#43979)",
-    "updated_at": "2026-02-20T16:51:42Z"
+    "state": "closed",
+    "title": "Fix peft conversion typo",
+    "updated_at": "2026-02-17T11:12:19Z"
   },
   {
-    "additions": 95,
-    "author": "akashadsare",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR migrates GPT-2 and its derivatives (GPTBigCode, Decision Transformer) to the new standardized output collection interface using the [@capture_outputs](vscode-file://vscode-app/usr/share/code/resources/app/out/vs/code/electron-brows\u2026",
-    "changed_files": 3,
+    "additions": 2,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix incorrect forward type hint for Gemma3n ## Details The type hint didn't match the actual returned class: https://github.com/huggingface/transformers/blob/349e00c1a367ce263624e525038250625dcf20c7/src/transforme\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44015",
-    "created_at": "2026-02-15T18:07:11Z",
-    "deletions": 231,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44051",
+    "created_at": "2026-02-16T17:26:24Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44015/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44015",
+    "files_url": "https://github.com/huggingface/transformers/pull/44051/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44051",
     "labels": [],
-    "merged": false,
-    "number": 44015,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Refactor GPT2-based models to standardized output collection interface",
-    "updated_at": "2026-02-15T18:13:56Z"
+    "merged": true,
+    "number": 44051,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`chore`] Fix incorrect forward type hint for Gemma3n",
+    "updated_at": "2026-02-20T09:08:07Z"
   },
   {
-    "additions": 45,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #43992 by preventing a false missing-key report for `UMT5EncoderModel` when `encoder.embed_tokens.weight` is tied to `shared.weight`. `UMT5EncoderModel` already declares tied weights, but loading checkpoints that only carr\u2026",
-    "changed_files": 2,
+    "additions": 15,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `get_text_features` for ChineseCLIP ### Details The `get_text_features` assumes that the `text_model` returns a `BaseModelOutputWithPooling`, just like is done with many other models. Currently, the `get_tex\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44014",
-    "created_at": "2026-02-15T15:17:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44050",
+    "created_at": "2026-02-16T17:23:31Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44014/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44014",
+    "files_url": "https://github.com/huggingface/transformers/pull/44050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44050",
     "labels": [],
     "merged": false,
-    "number": 44014,
-    "review_comments_count": 0,
+    "number": 44050,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "[UMT5] Ignore tied encoder embedding missing-key warning",
-    "updated_at": "2026-02-16T13:40:21Z"
+    "title": "[`fix`] Patch `get_text_features` for ChineseCLIP",
+    "updated_at": "2026-02-17T09:55:17Z"
   },
   {
-    "additions": 10,
-    "author": "gabrielfruet",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 59,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `fnet` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of #43979. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": FNetLayer}` to `FNetPreTrainedModel`\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44013",
-    "created_at": "2026-02-15T13:49:53Z",
-    "deletions": 43,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44049",
+    "created_at": "2026-02-16T17:19:04Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44013/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44013",
+    "files_url": "https://github.com/huggingface/transformers/pull/44049/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44049",
     "labels": [],
     "merged": false,
-    "number": 44013,
+    "number": 44049,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Ouptut tracing: Standardizing MobileNetv2",
-    "updated_at": "2026-02-15T13:50:59Z"
+    "state": "closed",
+    "title": "Refactor fnet model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:13Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "additions": 4,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix up `__repr__` whitespace/brackets ## Reproducer ```python from transformers import AutoTokenizer, PreTrainedTokenizerBase # __repr__ via PreTrainedTokenizerBase tokenizer = AutoTokenizer.from_pretrained(\"bert-\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44012",
-    "created_at": "2026-02-15T11:20:17Z",
-    "deletions": 159,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44048",
+    "created_at": "2026-02-16T17:18:10Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44012/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44012",
+    "files_url": "https://github.com/huggingface/transformers/pull/44048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44048",
     "labels": [],
-    "merged": false,
-    "number": 44012,
+    "merged": true,
+    "number": 44048,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor output tracing for swinv2 model",
-    "updated_at": "2026-02-17T14:15:19Z"
+    "title": "[`simple`] Fix up `__repr__` whitespace/brackets",
+    "updated_at": "2026-02-20T10:03:34Z"
   },
   {
-    "additions": 79,
+    "additions": 35,
     "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44011",
-    "created_at": "2026-02-15T11:11:02Z",
-    "deletions": 146,
+    "body_excerpt": "## What does this PR do? Refactors the `bloom` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of the effort in #43979. ### Changes: - Add `_can_record_outputs` dict to `BloomPreTrainedModel` mapping `hi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44047",
+    "created_at": "2026-02-16T17:15:25Z",
+    "deletions": 104,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44011/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44011",
+    "files_url": "https://github.com/huggingface/transformers/pull/44047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44047",
     "labels": [],
     "merged": false,
-    "number": 44011,
+    "number": 44047,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-02-17T14:15:17Z"
+    "title": "Refactor bloom model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:14Z"
   },
   {
-    "additions": 41,
-    "author": "preetam1407",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "#43979. Refactors SqueezeBert to the standardized output collection interface: - Adds `_can_record_outputs` in `SqueezeBertPreTrainedModel` - Adds `@capture_outputs` on `SqueezeBertModel.forward` - Adds `@can_return_tuple` on task model fo\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44010",
-    "created_at": "2026-02-15T09:40:09Z",
-    "deletions": 139,
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44046",
+    "created_at": "2026-02-16T17:07:38Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44010/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44010",
+    "files_url": "https://github.com/huggingface/transformers/pull/44046/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44046",
     "labels": [],
     "merged": false,
-    "number": 44010,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "[SqueezeBert] Migrate to standardized output collection decorators",
-    "updated_at": "2026-04-13T08:20:09Z"
+    "number": 44046,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-02-17T14:15:23Z"
   },
   {
-    "additions": 1,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43976 Updated the documentation to reflect the actual Python requirement (3.10+) as defined in setup.py. Changes: Updated README.md .",
-    "changed_files": 1,
+    "additions": 456215,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4939,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44009",
-    "created_at": "2026-02-15T08:51:26Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44009/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44009",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44045",
+    "created_at": "2026-02-16T17:01:41Z",
+    "deletions": 591028,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44045/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44045",
     "labels": [],
-    "merged": true,
-    "number": 44009,
+    "merged": false,
+    "number": 44045,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "update python requirement to 3.10+ to match codebase",
-    "updated_at": "2026-02-16T13:46:56Z"
+    "title": "Flash-mla-interface",
+    "updated_at": "2026-02-16T17:11:51Z"
   },
   {
-    "additions": 26,
-    "author": "pdwi2020",
+    "additions": 49,
+    "author": "rwtarpit",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary - refactor `ResNetModel` to use `@capture_outputs` for hidden-state collection - register `_can_record_outputs` on `ResNetPreTrainedModel` with `ResNetStage` - switch `ResNetForImageClassification` and `ResNetBackbone` to `@can_\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-43979-21",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
     "cluster_ids": [
-      "cluster-43979-21"
+      "cluster-43979-24"
     ],
     "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44007",
-    "created_at": "2026-02-15T07:26:52Z",
-    "deletions": 58,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44044",
+    "created_at": "2026-02-16T16:43:19Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44007/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44007",
+    "files_url": "https://github.com/huggingface/transformers/pull/44044/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44044",
     "labels": [],
     "merged": false,
-    "number": 44007,
+    "number": 44044,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[ResNet] Refactor output tracing to decorator-based interface",
-    "updated_at": "2026-02-19T15:49:49Z"
+    "title": "Refactor DeBERTa's output tracing interface",
+    "updated_at": "2026-02-16T18:57:29Z"
   },
   {
-    "additions": 8,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR uses torch.xlogy for better numerical handling.",
-    "changed_files": 8,
+    "additions": 170,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 31,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44006",
-    "created_at": "2026-02-15T04:07:50Z",
-    "deletions": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44043",
+    "created_at": "2026-02-16T16:23:57Z",
+    "deletions": 162,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44006/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44006",
+    "files_url": "https://github.com/huggingface/transformers/pull/44043/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44043",
     "labels": [],
     "merged": true,
-    "number": 44006,
-    "review_comments_count": 0,
+    "number": 44043,
+    "review_comments_count": 15,
     "state": "closed",
-    "title": "Use torch.xlogy ",
-    "updated_at": "2026-02-17T00:42:54Z"
+    "title": "`grouped_mm` fallback",
+    "updated_at": "2026-02-23T13:58:09Z"
   },
   {
-    "additions": 224,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR transfers grid_thw to a python list at the beginning of some functions to reduce later CUDA sync calls. Therefore, several sync calls are merged into one call.",
-    "changed_files": 16,
+    "additions": 1,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Joao is regrettably no longer with us :saluting_face: so we should really stop getting users to ping him! This PR makes @cyrilvallez responsible for `generate` issues outside of VLMs.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44005",
-    "created_at": "2026-02-15T02:34:55Z",
-    "deletions": 254,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44042",
+    "created_at": "2026-02-16T16:00:36Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44005/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44005",
+    "files_url": "https://github.com/huggingface/transformers/pull/44042/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44042",
     "labels": [],
     "merged": true,
-    "number": 44005,
-    "review_comments_count": 1,
+    "number": 44042,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Reduce reduce CUDA sync",
-    "updated_at": "2026-02-17T01:00:52Z"
+    "title": "Update assignee for generate in bug report template",
+    "updated_at": "2026-02-16T16:09:19Z"
   },
   {
-    "additions": 21,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `codegen` model as per #43979 cc @molbap <img width=\"843\" height=\"445\" alt=\"Screenshot 2026-02-15 at 5 24 52 AM\" src=\"https://github.com/user-attachments/assets/d5aeb711-96a7-4fd8-af7b-0aeac23eeeb1\" /> 2 tests are bei\u2026",
-    "changed_files": 1,
+    "additions": 469,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Alternate PR to #43985 to be a reorder only PR. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, s\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44004",
-    "created_at": "2026-02-14T23:56:18Z",
-    "deletions": 62,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44041",
+    "created_at": "2026-02-16T15:40:41Z",
+    "deletions": 457,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44004/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44004",
+    "files_url": "https://github.com/huggingface/transformers/pull/44041/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44041",
     "labels": [],
-    "merged": false,
-    "number": 44004,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `codegen`",
-    "updated_at": "2026-02-17T08:56:07Z"
+    "merged": true,
+    "number": 44041,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "refactor _inner_training_loop to smaller methods",
+    "updated_at": "2026-02-23T16:52:09Z"
   },
   {
-    "additions": 37,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `mamba` model as per #43979 cc @molbap <img width=\"859\" height=\"427\" alt=\"Screenshot 2026-02-15 at 5 12 43 AM\" src=\"https://github.com/user-attachments/assets/f23bb675-a9a3-4e21-a6c5-9804910301b4\" /> Note - Only 46 te\u2026",
-    "changed_files": 2,
+    "additions": 366,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44008 and re-enables tests",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44003",
-    "created_at": "2026-02-14T23:46:10Z",
-    "deletions": 68,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44040",
+    "created_at": "2026-02-16T12:43:28Z",
+    "deletions": 230,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44003/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44003",
+    "files_url": "https://github.com/huggingface/transformers/pull/44040/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44040",
     "labels": [],
+    "merged": true,
+    "number": 44040,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "Fix gemma3n `get_audio_features`",
+    "updated_at": "2026-02-19T12:50:00Z"
+  },
+  {
+    "additions": 47,
+    "author": "itzyesse99-lgtm",
+    "author_association": "NONE",
+    "body_excerpt": "```diff diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index 1234567..8901234 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -10,6 +10,7 @@ from transformers import PreTrai\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44039",
+    "created_at": "2026-02-16T12:01:26Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44039/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44039",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44003,
+    "number": 44039,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `mamba`",
-    "updated_at": "2026-02-17T07:40:50Z"
+    "state": "closed",
+    "title": "AI Fix for #43979",
+    "updated_at": "2026-03-14T12:34:32Z"
   },
   {
-    "additions": 7,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `upernet` model as per #43979 cc @molbap <img width=\"856\" height=\"457\" alt=\"Screenshot 2026-02-15 at 4 51 03 AM\" src=\"https://github.com/user-attachments/assets/5dc478d7-d708-4296-a86b-c3bb252d0325\" />",
-    "changed_files": 1,
+    "additions": 23,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/43913",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44002",
-    "created_at": "2026-02-14T23:21:45Z",
-    "deletions": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44037",
+    "created_at": "2026-02-16T11:02:12Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44002/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44002",
-    "labels": [],
-    "merged": false,
-    "number": 44002,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `upernet`",
-    "updated_at": "2026-02-17T08:55:16Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44037/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44037",
+    "labels": [],
+    "merged": true,
+    "number": 44037,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Add a dim check mechanism in Transpose and fix qwen3_vl_moe weight mapping",
+    "updated_at": "2026-02-16T16:01:12Z"
   },
   {
-    "additions": 3,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`univnet` model as per #43979 cc @molbap <img width=\"848\" height=\"462\" alt=\"Screenshot 2026-02-15 at 4 19 00 AM\" src=\"https://github.com/user-attachments/assets/75848429-b9ff-49b3-a028-645aa67fc2ad\" />",
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? So the following logic added in a previous PR #44033 could take effect ```python # `include_all` is `True` when the CI is running on a pull request, so it treats all failing tests # in the current CI run as \"new fai\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44001",
-    "created_at": "2026-02-14T22:50:39Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44036",
+    "created_at": "2026-02-16T10:14:54Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44001/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44001",
+    "files_url": "https://github.com/huggingface/transformers/pull/44036/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44036",
     "labels": [],
-    "merged": false,
-    "number": 44001,
+    "merged": true,
+    "number": 44036,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `univnet`",
-    "updated_at": "2026-02-14T23:22:13Z"
+    "state": "closed",
+    "title": "Remove `other_workflow_run_ids` for `issue_comment` in `utils/notification_service.py`",
+    "updated_at": "2026-02-16T10:24:07Z"
   },
   {
-    "additions": 8,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `vision_text_dual_encoder` model issue as per #43979 cc @molbap <img width=\"876\" height=\"292\" alt=\"Screenshot 2026-02-15 at 4 09 07 AM\" src=\"https://github.com/user-attachments/assets/11147a56-993b-4abc-b07a-ec739a53d\u2026",
-    "changed_files": 1,
+    "additions": 25,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We already brought it back with: ```python if clean_up_tokenization_spaces: # Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement) if hasattr(self, \"clean_up_tokenization\") and callab\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44000",
-    "created_at": "2026-02-14T22:44:14Z",
-    "deletions": 21,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44035",
+    "created_at": "2026-02-16T09:49:28Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44000/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44000",
+    "files_url": "https://github.com/huggingface/transformers/pull/44035/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44035",
     "labels": [],
-    "merged": false,
-    "number": 44000,
+    "merged": true,
+    "number": 44035,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `vision_text_dual_encoder`",
-    "updated_at": "2026-02-17T07:46:33Z"
+    "state": "closed",
+    "title": "bring back our demons: clean_up_tokenization_spaces",
+    "updated_at": "2026-02-20T14:50:18Z"
   },
   {
-    "additions": 10,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `mobilenet_v1` model as per #43979 cc @molbap",
+    "additions": 18,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43999",
-    "created_at": "2026-02-14T22:20:19Z",
-    "deletions": 30,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44034",
+    "created_at": "2026-02-16T08:04:20Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43999/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43999",
+    "files_url": "https://github.com/huggingface/transformers/pull/44034/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44034",
     "labels": [],
     "merged": false,
-    "number": 43999,
+    "number": 44034,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `mobilenet_v1`",
-    "updated_at": "2026-02-17T07:52:08Z"
+    "state": "closed",
+    "title": "don't merge check workflow",
+    "updated_at": "2026-02-16T10:52:50Z"
   },
   {
-    "additions": 8,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `timm_backbone` model issue as per #43979 cc @molbap <img width=\"856\" height=\"423\" alt=\"Screenshot 2026-02-15 at 4 10 15 AM\" src=\"https://github.com/user-attachments/assets/26237c3e-7b66-4f0d-a8b5-ffad6ee7c673\" />",
-    "changed_files": 1,
+    "additions": 143,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Improve new failing test analysis for PR comment CI",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43998",
-    "created_at": "2026-02-14T22:12:30Z",
-    "deletions": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44033",
+    "created_at": "2026-02-16T07:30:33Z",
+    "deletions": 49,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43998/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43998",
+    "files_url": "https://github.com/huggingface/transformers/pull/44033/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44033",
     "labels": [],
-    "merged": false,
-    "number": 43998,
+    "merged": true,
+    "number": 44033,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `timm_backbone`",
-    "updated_at": "2026-02-21T07:29:47Z"
+    "state": "closed",
+    "title": "Improve new failing test analysis for PR comment CI",
+    "updated_at": "2026-02-16T08:02:16Z"
   },
   {
-    "additions": 12,
-    "author": "karthiksuki",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR migrates the **RegNet** model to the standardized output collection interface as part of the ongoing refactoring effort in issue #43979. Specifically: - Adds the `_can_record_outputs` dictionary to `RegNetPr\u2026",
+    "additions": 3,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update FP8 expert replacement to use `model.config.text_config` when available (VLMs), falling back to model.config if it's text-only models.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43997",
-    "created_at": "2026-02-14T19:57:54Z",
-    "deletions": 45,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44032",
+    "created_at": "2026-02-16T06:02:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43997/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43997",
+    "files_url": "https://github.com/huggingface/transformers/pull/44032/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44032",
     "labels": [],
-    "merged": false,
-    "number": 43997,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Migrate RegNet to standardized output tracing",
-    "updated_at": "2026-02-14T20:10:22Z"
+    "merged": true,
+    "number": 44032,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[Misc][vlms] Use text_config when initializing the fine-grained FP8Expert",
+    "updated_at": "2026-02-19T10:28:31Z"
   },
   {
-    "additions": 44,
-    "author": "beelapranay",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors FNet and CVT output tracing to use the standardized decorators where appropriate. 1. FNet now uses @capture_outputs with _can_record_outputs to collect hidden states. 2. CVT keeps manual hidden-state colle\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
-    "cluster_ids": [
-      "cluster-43979-21"
-    ],
-    "cluster_role": "member",
+    "additions": 11,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`dpr` model as per #43979 cc @molbap <img width=\"853\" height=\"323\" alt=\"Screenshot 2026-02-16 at 9 13 30 AM\" src=\"https://github.com/user-attachments/assets/d658f1d0-75e8-4eac-8a12-9aeddf194dde\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43996",
-    "created_at": "2026-02-14T17:55:31Z",
-    "deletions": 134,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44030",
+    "created_at": "2026-02-16T03:44:19Z",
+    "deletions": 58,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43996/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43996",
+    "files_url": "https://github.com/huggingface/transformers/pull/44030/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44030",
     "labels": [],
     "merged": false,
-    "number": 43996,
-    "review_comments_count": 0,
+    "number": 44030,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Refactor FNet and CVT output tracing",
-    "updated_at": "2026-02-14T18:10:17Z"
+    "title": "refactor output tracing in `dpr`",
+    "updated_at": "2026-02-17T07:46:00Z"
   },
   {
     "additions": 21,
-    "author": "akeemlh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors falcon in partial fulfillment of https://github.com/huggingface/transformers/issues/43979 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`rwkv` model as per #43979 cc @molbap <img width=\"856\" height=\"333\" alt=\"Screenshot 2026-02-16 at 9 06 34 AM\" src=\"https://github.com/user-attachments/assets/9c8c5d41-ffbd-45f6-8b9b-1429bcb14543\" />",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43995",
-    "created_at": "2026-02-14T14:39:58Z",
-    "deletions": 87,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44029",
+    "created_at": "2026-02-16T03:37:13Z",
+    "deletions": 55,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43995/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43995",
+    "files_url": "https://github.com/huggingface/transformers/pull/44029/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44029",
     "labels": [],
     "merged": false,
-    "number": 43995,
+    "number": 44029,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Refactoring falcon model to match standardized output collection interface",
-    "updated_at": "2026-02-14T14:41:00Z"
+    "title": "refactor output tracing in `rwkv`",
+    "updated_at": "2026-02-17T07:47:02Z"
   },
   {
-    "additions": 12,
-    "author": "saurav0369",
+    "additions": 13,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### Docs: Fix Typos and Standardize Naming This PR fixes various typos, duplicate words, and capitalization inconsistencies across the documentation to improve readability and ensure professional branding. | File | Changes Made | | :--- |\u2026",
-    "changed_files": 7,
+    "body_excerpt": "This PR refactors the`superpoint` model as per #43979 cc @molbap <img width=\"857\" height=\"334\" alt=\"Screenshot 2026-02-16 at 8 53 43 AM\" src=\"https://github.com/user-attachments/assets/17781b76-743b-4b38-923a-8db3b94ccd01\" />",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43993",
-    "created_at": "2026-02-14T10:11:40Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44028",
+    "created_at": "2026-02-16T03:25:14Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43993/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43993",
+    "files_url": "https://github.com/huggingface/transformers/pull/44028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44028",
     "labels": [],
-    "merged": true,
-    "number": 43993,
+    "merged": false,
+    "number": 44028,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs: fix typos across documentation files",
-    "updated_at": "2026-02-16T13:41:41Z"
+    "state": "open",
+    "title": "refactor output tracing for `superpoint`",
+    "updated_at": "2026-02-17T07:46:06Z"
   },
   {
-    "additions": 3,
-    "author": "taovinci0",
-    "author_association": "NONE",
-    "body_excerpt": "Replaces mutable default dict `weights={}` with `weights=None` and initializes inside the function. The dict is mutated via `weights[full_key] = w`, which can cause unexpected behavior across multiple calls.",
+    "additions": 6,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `speech_encoder_decoder` model as per #43979 cc @molbap <img width=\"852\" height=\"335\" alt=\"Screenshot 2026-02-16 at 8 44 05 AM\" src=\"https://github.com/user-attachments/assets/ee25c72b-b995-403c-b47b-3e9cbae0d2cc\" />",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43991",
-    "created_at": "2026-02-14T00:00:00Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44027",
+    "created_at": "2026-02-16T03:14:41Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43991/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43991",
+    "files_url": "https://github.com/huggingface/transformers/pull/44027/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44027",
     "labels": [],
     "merged": false,
-    "number": 43991,
+    "number": 44027,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: replace mutable default argument in _read_h5_weights",
-    "updated_at": "2026-02-16T11:18:06Z"
+    "state": "open",
+    "title": "refactor output tracing in `speech_encoder_decoder`",
+    "updated_at": "2026-02-17T09:04:35Z"
   },
   {
-    "additions": 10,
-    "author": "Abhijeetsingh610",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `AutoVideoProcessor` when `torchvision` is unavailable. `VIDEO_PROCESSOR_MAPPING_NAMES` can contain `None`, and `video_processor_class_from_name` was doing `if class_name in extractors`, which rais\u2026",
-    "changed_files": 2,
+    "additions": 12,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`vision_encoder_decoder` model as per #43979 cc @molbap <img width=\"849\" height=\"333\" alt=\"Screenshot 2026-02-16 at 8 28 20 AM\" src=\"https://github.com/user-attachments/assets/9f511a17-947b-46ed-82a8-8bb9bb103f15\" />",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43989",
-    "created_at": "2026-02-13T20:48:03Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44026",
+    "created_at": "2026-02-16T02:59:14Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43989/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43989",
+    "files_url": "https://github.com/huggingface/transformers/pull/44026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44026",
     "labels": [],
     "merged": false,
-    "number": 43989,
+    "number": 44026,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix AutoVideoProcessor class lookup when torchvision is unavailable",
-    "updated_at": "2026-02-18T17:52:34Z"
+    "title": "refactor output tracing for `vision_encoder_decoder`",
+    "updated_at": "2026-02-17T09:05:22Z"
   },
   {
     "additions": 7,
-    "author": "harshaljanjani",
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **LayoutXLM:** [This PR (rm slow tokenizers)](https://github.com/huggingface/transformers/pull/40936) changed [models/auto/tokenization_auto.py](\u2026",
+    "body_excerpt": "This PR refactors the `depth_anything` model as per #43979 cc @molbap <img width=\"840\" height=\"330\" alt=\"Screenshot 2026-02-16 at 8 25 01 AM\" src=\"https://github.com/user-attachments/assets/fe7770be-70cb-4343-accb-7407c6bbb4f8\" />",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43988",
-    "created_at": "2026-02-13T20:03:28Z",
-    "deletions": 9,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44025",
+    "created_at": "2026-02-16T02:56:17Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43988/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43988",
+    "files_url": "https://github.com/huggingface/transformers/pull/44025/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44025",
     "labels": [],
-    "merged": true,
-    "number": 43988,
+    "merged": false,
+    "number": 44025,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(testing): Fix LayoutXLM tokenization test and LightOnOCR SDPA flash test failures on main CI",
-    "updated_at": "2026-02-23T14:07:59Z"
+    "state": "open",
+    "title": "refactor output tracing for `depth_anything`",
+    "updated_at": "2026-02-17T07:46:31Z"
   },
   {
-    "additions": 47,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? Accelerator has a lot of other args that can be passed to it like fp8 support, etc, but requires extensive monkey patching downstream to make it work. This makes it easier to extend the accelerator args building met\u2026",
+    "additions": 15,
+    "author": "mmahjoub5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR refactors the FocalNet implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43987",
-    "created_at": "2026-02-13T18:51:56Z",
-    "deletions": 38,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43987/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43987",
-    "labels": [],
-    "merged": true,
-    "number": 43987,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "split out accelerator args builder method",
-    "updated_at": "2026-02-16T14:59:03Z"
-  },
-  {
-    "additions": 1828,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? The `_inner_training_loop` method has a lot going on which makes it hard to extend for downstream developers/libraries. This PR breaks it up into smaller well described methods that are chained in the training loop.\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43985",
-    "created_at": "2026-02-13T17:55:01Z",
-    "deletions": 251,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44024",
+    "created_at": "2026-02-15T23:48:12Z",
+    "deletions": 60,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43985/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43985",
+    "files_url": "https://github.com/huggingface/transformers/pull/44024/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44024",
     "labels": [],
     "merged": false,
-    "number": 43985,
+    "number": 44024,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor inner training loop",
-    "updated_at": "2026-03-09T19:57:50Z"
+    "state": "open",
+    "title": "Focalnet standardized outputs",
+    "updated_at": "2026-02-17T08:47:48Z"
   },
   {
-    "additions": 2,
-    "author": "materight",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Removes unused `.squeeze` from VJEPA2 embeddings rotation. Currently the squeeze does nothing on video input since torch skips it if the dimension is not 1. Exporting to onnx and compiling to TensorRT instead fails\u2026",
+    "additions": 32,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the Nystromformer model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Add `_can_record_outputs` on `Nystromform\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43984",
-    "created_at": "2026-02-13T17:53:16Z",
-    "deletions": 2,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44023",
+    "created_at": "2026-02-15T21:53:48Z",
+    "deletions": 122,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43984/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43984",
+    "files_url": "https://github.com/huggingface/transformers/pull/44023/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44023",
     "labels": [],
-    "merged": true,
-    "number": 43984,
+    "merged": false,
+    "number": 44023,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove unused squeeze from VJEPA2 embeddings rotation",
-    "updated_at": "2026-02-13T21:56:01Z"
+    "title": "Refactor Nystromformer output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:15Z"
   },
   {
-    "additions": 62,
-    "author": "Aki-07",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-21",
+    "additions": 57,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the ConvBERT model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Move `ConvBertPreTrainedModel` after layer def\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
     "cluster_ids": [
-      "cluster-43979-21"
+      "cluster-43979-24"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43983",
-    "created_at": "2026-02-13T17:52:45Z",
-    "deletions": 188,
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44022",
+    "created_at": "2026-02-15T21:49:57Z",
+    "deletions": 152,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43983/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43983",
+    "files_url": "https://github.com/huggingface/transformers/pull/44022/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44022",
     "labels": [],
-    "merged": true,
-    "number": 43983,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44022,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Migrate GPT2 to standardized output capture decorators",
-    "updated_at": "2026-02-18T10:40:51Z"
+    "title": "Refactor ConvBERT output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:17Z"
   },
   {
-    "additions": 1,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR create a `.git-blame-ignore-revs` file to ignore the following commit https://github.com/huggingface/transformers/pull/43914 when using git blame.",
-    "changed_files": 1,
+    "additions": 22,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43906 (related to #38071) ### Problem When using `pipeline('text-generation')` with batched inference on Qwen3 (and other models where `pad_token_id == bos_token_id`), a spurious warning is emitted: > A deco\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43982",
-    "created_at": "2026-02-13T17:13:41Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44021",
+    "created_at": "2026-02-15T21:45:58Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43982/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43982",
+    "files_url": "https://github.com/huggingface/transformers/pull/44021/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44021",
     "labels": [],
     "merged": true,
-    "number": 43982,
+    "number": 44021,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "create .git-blame-ignore-revs file ",
-    "updated_at": "2026-02-16T13:08:22Z"
+    "title": "Fix false positive right-padding warning for decoder-only models in pipeline",
+    "updated_at": "2026-02-17T10:41:32Z"
   },
   {
-    "additions": 5,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some of our image processors have a fast return for images that are already square. However, this fast return skips the `background_color` check, which causes flaky test failures because the `test_padding` test uses `self.assertRaises()` t\u2026",
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43981",
-    "created_at": "2026-02-13T17:01:51Z",
-    "deletions": 0,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44020",
+    "created_at": "2026-02-15T21:39:17Z",
+    "deletions": 129,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43981/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43981",
+    "files_url": "https://github.com/huggingface/transformers/pull/44020/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44020",
     "labels": [],
-    "merged": true,
-    "number": 43981,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 44020,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix early image processor return not raising error",
-    "updated_at": "2026-02-16T16:40:41Z"
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:21Z"
   },
   {
-    "additions": 3,
-    "author": "albertvillanova",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Escape `%` in `help` for `ArgumentParser.add_argument` to fix TypeError: > TypeError: not enough arguments for format string Context: https://docs.python.org/3/library/argparse.html#help > As the help string support\u2026",
+    "additions": 17,
+    "author": "Sid-V5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Refactored the `resnet` model to use the standardized output tracing decorators (`@capture_outputs` and `@can_return_tuple`) as part of the migration ### Changes | File | Change | |------|--------| | `modeling_resnet.py` | Migrated to `@ca\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43980",
-    "created_at": "2026-02-13T15:43:52Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44019",
+    "created_at": "2026-02-15T19:53:19Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43980/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43980",
+    "files_url": "https://github.com/huggingface/transformers/pull/44019/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44019",
     "labels": [],
     "merged": false,
-    "number": 43980,
+    "number": 44019,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Escape % in help for ArgumentParser.add_argument to fix TypeError",
-    "updated_at": "2026-02-17T17:30:07Z"
+    "state": "open",
+    "title": "Refactor `resnet` to use `@capture_outputs` / `@can_return_tuple` output tracing",
+    "updated_at": "2026-02-15T20:01:23Z"
   },
   {
-    "additions": 0,
-    "author": "NicoSimo",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Minor fix, resolves some older references to Python3.9. Fixes #43976 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the\u2026",
-    "changed_files": 0,
+    "additions": 41,
+    "author": "yashbora9",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary - Migrates `gpt_neo` to the standardized output collection interface as part of #43979 - Adds `@capture_outputs` decorator on `GPTNeoModel.forward` (base model) - Adds `@can_return_tuple` decorator on all wrapper model forwards\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43978",
-    "created_at": "2026-02-13T14:40:54Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44018",
+    "created_at": "2026-02-15T19:35:06Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43978/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43978",
+    "files_url": "https://github.com/huggingface/transformers/pull/44018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44018",
     "labels": [],
     "merged": false,
-    "number": 43978,
+    "number": 44018,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Update references to Python3.9 to Python3.10. Resolves #43976",
-    "updated_at": "2026-02-13T17:00:07Z"
+    "state": "open",
+    "title": "Refactor GPT-Neo output tracing to use capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-16T20:33:37Z"
   },
   {
-    "additions": 48,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "additions": 13,
+    "author": "nexiouscaliver",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR refactors \\`SegformersPreTrainedModel\\` and \\`SegformersForImageClassification\\` to use standardized \\`@capture_outputs\\` and \\`@can_return_tuple\\` decorators for automatic output collection. ### Changes 1. **Imported \\`@capture_ou\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43977",
-    "created_at": "2026-02-13T13:18:49Z",
-    "deletions": 23,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44017",
+    "created_at": "2026-02-15T19:27:22Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43977/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43977",
+    "files_url": "https://github.com/huggingface/transformers/pull/44017/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44017",
     "labels": [],
-    "merged": true,
-    "number": 43977,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "stable grouped_mm API",
-    "updated_at": "2026-02-16T11:09:33Z"
+    "merged": false,
+    "number": 44017,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor output tracing in segformers (#43979)",
+    "updated_at": "2026-02-20T16:51:42Z"
   },
   {
-    "additions": 1659,
-    "author": "Abubakar-rashid",
-    "author_association": "NONE",
-    "body_excerpt": "This fixes issue #43957 reported by @xvdp, where models fail to load when using [torch.device('meta')](vscode-file://vscode-app/c:/Users/Priva/AppData/Local/Programs/Microsoft%20VS%20Code/_/resources/app/out/vs/code/electron-browser/workbe\u2026",
-    "changed_files": 28,
+    "additions": 95,
+    "author": "akashadsare",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR migrates GPT-2 and its derivatives (GPTBigCode, Decision Transformer) to the new standardized output collection interface using the [@capture_outputs](vscode-file://vscode-app/usr/share/code/resources/app/out/vs/code/electron-brows\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43974",
-    "created_at": "2026-02-13T11:27:42Z",
-    "deletions": 381,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44015",
+    "created_at": "2026-02-15T18:07:11Z",
+    "deletions": 231,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43974/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43974",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44015/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44015",
+    "labels": [],
     "merged": false,
-    "number": 43974,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Replace torch.linspace().item() with python_linspace() to resolv\u2026",
-    "updated_at": "2026-02-16T13:46:49Z"
+    "number": 44015,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Refactor GPT2-based models to standardized output collection interface",
+    "updated_at": "2026-02-15T18:13:56Z"
   },
   {
-    "additions": 3909,
-    "author": "MHRDYN7",
+    "additions": 45,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Add support for lfm 2/2.5 audio models. (closes #43909)",
-    "changed_files": 16,
+    "body_excerpt": "## Summary Fixes #43992 by preventing a false missing-key report for `UMT5EncoderModel` when `encoder.embed_tokens.weight` is tied to `shared.weight`. `UMT5EncoderModel` already declares tied weights, but loading checkpoints that only carr\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43973",
-    "created_at": "2026-02-13T09:36:59Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44014",
+    "created_at": "2026-02-15T15:17:22Z",
     "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43973/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43973",
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44014/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44014",
+    "labels": [],
+    "merged": false,
+    "number": 44014,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[UMT5] Ignore tied encoder embedding missing-key warning",
+    "updated_at": "2026-02-16T13:40:21Z"
+  },
+  {
+    "additions": 10,
+    "author": "gabrielfruet",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44013",
+    "created_at": "2026-02-15T13:49:53Z",
+    "deletions": 43,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44013/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44013",
     "labels": [],
     "merged": false,
-    "number": 43973,
+    "number": 44013,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add lfm2.5 audio",
-    "updated_at": "2026-02-21T16:42:21Z"
+    "title": "Ouptut tracing: Standardizing MobileNetv2",
+    "updated_at": "2026-02-15T13:50:59Z"
   },
   {
-    "additions": 2219,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Following Ernie, we build 3d positions based on `mm_token_type_ids` and the models will return them by default from `processor`. We have a unified `get_vision_position` in the qwen2-vl model file, all other models j\u2026",
-    "changed_files": 45,
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 30,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43972",
-    "created_at": "2026-02-13T09:31:44Z",
-    "deletions": 1611,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44012",
+    "created_at": "2026-02-15T11:20:17Z",
+    "deletions": 159,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43972/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43972",
+    "files_url": "https://github.com/huggingface/transformers/pull/44012/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44012",
     "labels": [],
-    "merged": true,
-    "number": 43972,
-    "review_comments_count": 17,
+    "merged": false,
+    "number": 44012,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": ":rotating_light: Unify 3D position ids",
-    "updated_at": "2026-03-05T18:48:30Z"
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-02-17T14:15:19Z"
   },
   {
-    "additions": 65,
-    "author": "caffeinism",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? 1. According to the paper, this model is designed to reference 250 contexts (10 seconds), but the current implementation uses DynamicCache without employing create_sliding_window_causal_mask, causing it to reference\u2026",
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43971",
-    "created_at": "2026-02-13T09:28:32Z",
-    "deletions": 3,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44011",
+    "created_at": "2026-02-15T11:11:02Z",
+    "deletions": 146,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43971/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43971",
-    "labels": [
-      "Audio"
+    "files_url": "https://github.com/huggingface/transformers/pull/44011/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44011",
+    "labels": [],
+    "merged": false,
+    "number": 44011,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:17Z"
+  },
+  {
+    "additions": 41,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "#43979. Refactors SqueezeBert to the standardized output collection interface: - Adds `_can_record_outputs` in `SqueezeBertPreTrainedModel` - Adds `@capture_outputs` on `SqueezeBertModel.forward` - Adds `@can_return_tuple` on task model fo\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
     ],
-    "merged": true,
-    "number": 43971,
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44010",
+    "created_at": "2026-02-15T09:40:09Z",
+    "deletions": 139,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44010/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44010",
+    "labels": [],
+    "merged": false,
+    "number": 44010,
     "review_comments_count": 2,
-    "state": "closed",
-    "title": "[Mimi] Calibrate to ensure encoder streaming performs correctly",
-    "updated_at": "2026-02-23T14:20:01Z"
+    "state": "open",
+    "title": "[SqueezeBert] Migrate to standardized output collection decorators",
+    "updated_at": "2026-04-13T08:20:09Z"
   },
   {
-    "additions": 542,
-    "author": "jackcook",
+    "additions": 1,
+    "author": "mariam851",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for quantization with [Four Over Six (4/6)](https://github.com/mit-han-lab/fouroversix). Our library currently focuses on quantizing linear layers to NVFP4, including weight, activation, and gra\u2026",
-    "changed_files": 15,
+    "body_excerpt": "Fixes #43976 Updated the documentation to reflect the actual Python requirement (3.10+) as defined in setup.py. Changes: Updated README.md .",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43970",
-    "created_at": "2026-02-13T05:15:44Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44009",
+    "created_at": "2026-02-15T08:51:26Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43970/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43970",
+    "files_url": "https://github.com/huggingface/transformers/pull/44009/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44009",
     "labels": [],
     "merged": true,
-    "number": 43970,
-    "review_comments_count": 28,
+    "number": 44009,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add Four Over Six quantization integration",
-    "updated_at": "2026-02-25T09:30:09Z"
+    "title": "update python requirement to 3.10+ to match codebase",
+    "updated_at": "2026-02-16T13:46:56Z"
   },
   {
-    "additions": 6,
-    "author": "jp1924",
+    "additions": 26,
+    "author": "pdwi2020",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary - refactor `ResNetModel` to use `@capture_outputs` for hidden-state collection - register `_can_record_outputs` on `ResNetPreTrainedModel` with `ResNetStage` - switch `ResNetForImageClassification` and `ResNetBackbone` to `@can_\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44007",
+    "created_at": "2026-02-15T07:26:52Z",
+    "deletions": 58,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44007/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44007",
+    "labels": [],
+    "merged": false,
+    "number": 44007,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ResNet] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-19T15:49:49Z"
+  },
+  {
+    "additions": 8,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR uses torch.xlogy for better numerical handling.",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43969",
-    "created_at": "2026-02-13T03:47:24Z",
-    "deletions": 9,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44006",
+    "created_at": "2026-02-15T04:07:50Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43969/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43969",
+    "files_url": "https://github.com/huggingface/transformers/pull/44006/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44006",
     "labels": [],
-    "merged": false,
-    "number": 43969,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44006,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix for 2D weight error in embedding layer with ZeRO3",
-    "updated_at": "2026-02-20T07:32:14Z"
+    "title": "Use torch.xlogy ",
+    "updated_at": "2026-02-17T00:42:54Z"
   },
   {
-    "additions": 90,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Enhances `get_json_schema()` and `render_jinja_template()` to support instance methods, class methods, and static methods, not just plain functions. Previously, `get_json_schema()` only worked with standalone functi\u2026",
-    "changed_files": 3,
+    "additions": 224,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR transfers grid_thw to a python list at the beginning of some functions to reduce later CUDA sync calls. Therefore, several sync calls are merged into one call.",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43968",
-    "created_at": "2026-02-13T01:43:51Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44005",
+    "created_at": "2026-02-15T02:34:55Z",
+    "deletions": 254,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43968/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43968",
+    "files_url": "https://github.com/huggingface/transformers/pull/44005/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44005",
     "labels": [],
     "merged": true,
-    "number": 43968,
-    "review_comments_count": 2,
+    "number": 44005,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Enhance JSON schema generation to support instance, static, and class methods",
-    "updated_at": "2026-02-13T18:01:56Z"
+    "title": "Reduce reduce CUDA sync",
+    "updated_at": "2026-02-17T01:00:52Z"
   },
   {
-    "additions": 3,
-    "author": "shtse8",
+    "additions": 21,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes the `AttributeError: 'List' object has no attribute 'dtype'` crash in `run_classification.py` when loading JSON data with list-type labels for multi-label classification (reported in #43116). ### Problem When\u2026",
+    "body_excerpt": "This PR refactors the `codegen` model as per #43979 cc @molbap <img width=\"843\" height=\"445\" alt=\"Screenshot 2026-02-15 at 5 24 52 AM\" src=\"https://github.com/user-attachments/assets/d5aeb711-96a7-4fd8-af7b-0aeac23eeeb1\" /> 2 tests are bei\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43967",
-    "created_at": "2026-02-12T23:42:11Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44004",
+    "created_at": "2026-02-14T23:56:18Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43967/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43967",
+    "files_url": "https://github.com/huggingface/transformers/pull/44004/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44004",
     "labels": [],
     "merged": false,
-    "number": 43967,
+    "number": 44004,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix AttributeError in run_classification.py when detecting multi-label data",
-    "updated_at": "2026-02-12T23:42:11Z"
+    "title": "refactor output tracing for `codegen`",
+    "updated_at": "2026-02-17T08:56:07Z"
   },
   {
-    "additions": 10,
-    "author": "shtse8",
+    "additions": 37,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43867 When a model has sub-models with different naming conventions (e.g. `model.layers.26.self_attn.o_proj.weight` vs `desc_model.roberta.encoder.layers.7.norm1.weight`), `dot_natural_key` can produce lists\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43966",
-    "created_at": "2026-02-12T23:40:45Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43966/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43966",
-    "labels": [],
-    "merged": true,
-    "number": 43966,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix TypeError in dot_natural_key when state_dict keys have mixed types at same position",
-    "updated_at": "2026-02-13T17:39:52Z"
-  },
-  {
-    "additions": 77,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "updates `tokenizer_summary.md`: - condense intro and subword tokenization sections since this doc is mostly about BPE/Unigram/WordPiece - removes some redundant and introductory motivation content and links to the course for more info - pl\u2026",
+    "body_excerpt": "This PR refactors the `mamba` model as per #43979 cc @molbap <img width=\"859\" height=\"427\" alt=\"Screenshot 2026-02-15 at 5 12 43 AM\" src=\"https://github.com/user-attachments/assets/f23bb675-a9a3-4e21-a6c5-9804910301b4\" /> Note - Only 46 te\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43965",
-    "created_at": "2026-02-12T22:08:33Z",
-    "deletions": 200,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44003",
+    "created_at": "2026-02-14T23:46:10Z",
+    "deletions": 68,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43965/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43965",
+    "files_url": "https://github.com/huggingface/transformers/pull/44003/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44003",
     "labels": [],
-    "merged": true,
-    "number": 43965,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[docs] tokenizer summary",
-    "updated_at": "2026-02-17T18:17:25Z"
+    "merged": false,
+    "number": 44003,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `mamba`",
+    "updated_at": "2026-02-17T07:40:50Z"
   },
   {
-    "additions": 72,
-    "author": "tohtana",
+    "additions": 7,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes a ZeRO-3 checkpoint loading failure in Transformers\u2019 conversion-mapped loading path. In affected cases, many parameters are reported as missing and are actually not restored from checkpoint (they get reinitialized). `transfor\u2026",
-    "changed_files": 2,
+    "body_excerpt": "This PR refactors the `upernet` model as per #43979 cc @molbap <img width=\"856\" height=\"457\" alt=\"Screenshot 2026-02-15 at 4 51 03 AM\" src=\"https://github.com/user-attachments/assets/5dc478d7-d708-4296-a86b-c3bb252d0325\" />",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43964",
-    "created_at": "2026-02-12T22:01:48Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44002",
+    "created_at": "2026-02-14T23:21:45Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43964/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43964",
+    "files_url": "https://github.com/huggingface/transformers/pull/44002/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44002",
     "labels": [],
     "merged": false,
-    "number": 43964,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix ZeRO-3 conversion-mapped checkpoint loading",
-    "updated_at": "2026-02-13T22:58:54Z"
+    "number": 44002,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `upernet`",
+    "updated_at": "2026-02-17T08:55:16Z"
   },
   {
-    "additions": 5,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors usages of `file.readlines()` to more Pythonic equivalents (`list(file)` or direct iteration) in core tokenization and utility files. **Key Improvements:** 1. **Memory Optimization:** Replaced `list(f.readl\u2026",
-    "changed_files": 4,
+    "additions": 3,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`univnet` model as per #43979 cc @molbap <img width=\"848\" height=\"462\" alt=\"Screenshot 2026-02-15 at 4 19 00 AM\" src=\"https://github.com/user-attachments/assets/75848429-b9ff-49b3-a028-645aa67fc2ad\" />",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43963",
-    "created_at": "2026-02-12T21:16:47Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44001",
+    "created_at": "2026-02-14T22:50:39Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43963/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43963",
+    "files_url": "https://github.com/huggingface/transformers/pull/44001/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44001",
     "labels": [],
     "merged": false,
-    "number": 43963,
+    "number": 44001,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor redundant .readlines() with list()",
-    "updated_at": "2026-02-13T12:49:22Z"
+    "state": "open",
+    "title": "refactor output tracing in `univnet`",
+    "updated_at": "2026-02-14T23:22:13Z"
   },
   {
-    "additions": 59,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors `tests/cli/test_serve.py` to use native Python `assert` statements instead of legacy `unittest.TestCase` assertion methods. This modernization aligns the CLI tests with `pytest` best practices, enabling: -\u2026",
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `vision_text_dual_encoder` model issue as per #43979 cc @molbap <img width=\"876\" height=\"292\" alt=\"Screenshot 2026-02-15 at 4 09 07 AM\" src=\"https://github.com/user-attachments/assets/11147a56-993b-4abc-b07a-ec739a53d\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43962",
-    "created_at": "2026-02-12T18:02:43Z",
-    "deletions": 64,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44000",
+    "created_at": "2026-02-14T22:44:14Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43962/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43962",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44000/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44000",
+    "labels": [],
     "merged": false,
-    "number": 43962,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Refactor CLI tests using native pytest assertions",
-    "updated_at": "2026-02-13T12:49:11Z"
+    "number": 44000,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `vision_text_dual_encoder`",
+    "updated_at": "2026-02-17T07:46:33Z"
   },
   {
-    "additions": 12,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a common Python pitfall regarding **mutable default arguments**. In Python, default arguments are evaluated only once at function definition time. If a mutable object (like a `list`) is used as a default, that\u2026",
+    "additions": 10,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `mobilenet_v1` model as per #43979 cc @molbap",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43961",
-    "created_at": "2026-02-12T17:31:19Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43999",
+    "created_at": "2026-02-14T22:20:19Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43961/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43961",
+    "files_url": "https://github.com/huggingface/transformers/pull/43999/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43999",
     "labels": [],
     "merged": false,
-    "number": 43961,
+    "number": 43999,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Replace mutable default arguments with None",
-    "updated_at": "2026-02-13T12:45:04Z"
+    "title": "refactor output tracing in `mobilenet_v1`",
+    "updated_at": "2026-02-17T07:52:08Z"
   },
   {
-    "additions": 1194,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds the asynchronous batching feature to continuous batching (CB). Asynchronous batching, through the use of more VRAM and CUDA streams and events, greatly reduces the CPU overhead of preparing and updating batches by hi\u2026",
-    "changed_files": 11,
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `timm_backbone` model issue as per #43979 cc @molbap <img width=\"856\" height=\"423\" alt=\"Screenshot 2026-02-15 at 4 10 15 AM\" src=\"https://github.com/user-attachments/assets/26237c3e-7b66-4f0d-a8b5-ffad6ee7c673\" />",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43960",
-    "created_at": "2026-02-12T17:20:38Z",
-    "deletions": 666,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43998",
+    "created_at": "2026-02-14T22:12:30Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43960/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43960",
+    "files_url": "https://github.com/huggingface/transformers/pull/43998/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43998",
     "labels": [],
-    "merged": true,
-    "number": 43960,
-    "review_comments_count": 39,
-    "state": "closed",
-    "title": "[CB] [Major] Asynchronous batching",
-    "updated_at": "2026-02-23T10:11:28Z"
+    "merged": false,
+    "number": 43998,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `timm_backbone`",
+    "updated_at": "2026-02-21T07:29:47Z"
   },
   {
-    "additions": 32,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes potential `UnicodeDecodeError` on Windows (and other environments where the default encoding is not UTF-8) by enforcing `encoding=\"utf-8\"` in standard `open()` calls across the core library. ## Modifications A\u2026",
-    "changed_files": 10,
+    "additions": 12,
+    "author": "karthiksuki",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR migrates the **RegNet** model to the standardized output collection interface as part of the ongoing refactoring effort in issue #43979. Specifically: - Adds the `_can_record_outputs` dictionary to `RegNetPr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43959",
-    "created_at": "2026-02-12T17:00:15Z",
-    "deletions": 32,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43997",
+    "created_at": "2026-02-14T19:57:54Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43959/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43959",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43997/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43997",
+    "labels": [],
     "merged": false,
-    "number": 43959,
+    "number": 43997,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Enforce explicit UTF-8 encoding in core Library to prevent Windows crashes",
-    "updated_at": "2026-02-13T12:50:43Z"
+    "state": "open",
+    "title": "Migrate RegNet to standardized output tracing",
+    "updated_at": "2026-02-14T20:10:22Z"
   },
   {
-    "additions": 2,
-    "author": "MekkCyber",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update the quantization docker file to add kernels",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43958",
-    "created_at": "2026-02-12T16:31:42Z",
-    "deletions": 2,
+    "additions": 44,
+    "author": "beelapranay",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors FNet and CVT output tracing to use the standardized decorators where appropriate. 1. FNet now uses @capture_outputs with _can_record_outputs to collect hidden states. 2. CVT keeps manual hidden-state colle\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43996",
+    "created_at": "2026-02-14T17:55:31Z",
+    "deletions": 134,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43958/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43958",
+    "files_url": "https://github.com/huggingface/transformers/pull/43996/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43996",
     "labels": [],
     "merged": false,
-    "number": 43958,
+    "number": 43996,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[kernels] update docker file",
-    "updated_at": "2026-03-25T10:31:24Z"
+    "state": "open",
+    "title": "Refactor FNet and CVT output tracing",
+    "updated_at": "2026-02-14T18:10:17Z"
   },
   {
-    "additions": 8,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43913 After scanning the collection [here](https://huggingface.co/collections/Qwen/qwen3-vl), all models already have merged experts but need a transpose",
-    "changed_files": 2,
+    "additions": 21,
+    "author": "akeemlh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors falcon in partial fulfillment of https://github.com/huggingface/transformers/issues/43979 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43956",
-    "created_at": "2026-02-12T16:16:08Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43995",
+    "created_at": "2026-02-14T14:39:58Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43956/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43956",
+    "files_url": "https://github.com/huggingface/transformers/pull/43995/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43995",
     "labels": [],
     "merged": false,
-    "number": 43956,
+    "number": 43995,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix qwen3-vl-moe weight mapping",
-    "updated_at": "2026-02-12T17:29:22Z"
+    "state": "open",
+    "title": "Refactoring falcon model to match standardized output collection interface",
+    "updated_at": "2026-02-14T14:41:00Z"
   },
   {
-    "additions": 5,
-    "author": "MekkCyber",
+    "additions": 12,
+    "author": "saurav0369",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes kernel versions for new builds with torch 2.10",
-    "changed_files": 1,
+    "body_excerpt": "### Docs: Fix Typos and Standardize Naming This PR fixes various typos, duplicate words, and capitalization inconsistencies across the documentation to improve readability and ensure professional branding. | File | Changes Made | | :--- |\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43955",
-    "created_at": "2026-02-12T16:01:58Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43993",
+    "created_at": "2026-02-14T10:11:40Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43955/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43955",
+    "files_url": "https://github.com/huggingface/transformers/pull/43993/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43993",
     "labels": [],
     "merged": true,
-    "number": 43955,
+    "number": 43993,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[kernels] fix kernel versions ",
-    "updated_at": "2026-02-13T07:32:33Z"
+    "title": "docs: fix typos across documentation files",
+    "updated_at": "2026-02-16T13:41:41Z"
   },
   {
     "additions": 3,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "The LASR model uses `layerdrop`, which we forgot to disable in the tests. Since the tests only have 2 hidden layers, and the default layerdrop chance is `0.1`, this means there's a `0.1^2 = 1%` chance in any test that doesn't call `model.e\u2026",
+    "author": "taovinci0",
+    "author_association": "NONE",
+    "body_excerpt": "Replaces mutable default dict `weights={}` with `weights=None` and initializes inside the function. The dict is mutated via `weights[full_key] = w`, which can cause unexpected behavior across multiple calls.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43954",
-    "created_at": "2026-02-12T14:45:01Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43991",
+    "created_at": "2026-02-14T00:00:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43954/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43954",
+    "files_url": "https://github.com/huggingface/transformers/pull/43991/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43991",
     "labels": [],
-    "merged": true,
-    "number": 43954,
+    "merged": false,
+    "number": 43991,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix LASR test layerdrop issue",
-    "updated_at": "2026-02-12T17:03:42Z"
+    "title": "fix: replace mutable default argument in _read_h5_weights",
+    "updated_at": "2026-02-16T11:18:06Z"
   },
   {
-    "additions": 1,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43931, no transpose needed after standardizing the model impl to inherit from Qwen3-MoE",
-    "changed_files": 1,
+    "additions": 10,
+    "author": "Abhijeetsingh610",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `AutoVideoProcessor` when `torchvision` is unavailable. `VIDEO_PROCESSOR_MAPPING_NAMES` can contain `None`, and `video_processor_class_from_name` was doing `if class_name in extractors`, which rais\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43953",
-    "created_at": "2026-02-12T14:43:59Z",
-    "deletions": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43989",
+    "created_at": "2026-02-13T20:48:03Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43953/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43953",
+    "files_url": "https://github.com/huggingface/transformers/pull/43989/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43989",
     "labels": [],
     "merged": false,
-    "number": 43953,
+    "number": 43989,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix conversion mapping for Qwen3VL-MoE",
-    "updated_at": "2026-02-16T18:02:05Z"
+    "state": "open",
+    "title": "Fix AutoVideoProcessor class lookup when torchvision is unavailable",
+    "updated_at": "2026-02-18T17:52:34Z"
   },
   {
-    "additions": 78,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5088 We shouldn't use `rope_deltas` from prev `generation/forward` call if this is a new generation from scratch. This is already correctly implemented in `compute_3d_\u2026",
-    "changed_files": 14,
+    "additions": 7,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **LayoutXLM:** [This PR (rm slow tokenizers)](https://github.com/huggingface/transformers/pull/40936) changed [models/auto/tokenization_auto.py](\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43952",
-    "created_at": "2026-02-12T14:34:52Z",
-    "deletions": 31,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43988",
+    "created_at": "2026-02-13T20:03:28Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43952/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43952",
+    "files_url": "https://github.com/huggingface/transformers/pull/43988/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43988",
     "labels": [],
     "merged": true,
-    "number": 43952,
+    "number": 43988,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix qwen-vl position ids when generating several times",
-    "updated_at": "2026-02-12T16:22:39Z"
+    "title": "fix(testing): Fix LayoutXLM tokenization test and LightOnOCR SDPA flash test failures on main CI",
+    "updated_at": "2026-02-23T14:07:59Z"
   },
   {
-    "additions": 4,
-    "author": "lordaarush",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43883 After #42270, `all_tied_weights_keys` is initialized in `post_init()`, but remote models loaded with `trust_remote_code=True` don't always call `post_init()` properly, causing `AttributeError` when load\u2026",
+    "additions": 47,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Accelerator has a lot of other args that can be passed to it like fp8 support, etc, but requires extensive monkey patching downstream to make it work. This makes it easier to extend the accelerator args building met\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43951",
-    "created_at": "2026-02-12T14:32:00Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43951/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43951",
-    "labels": [],
-    "merged": false,
-    "number": 43951,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix AttributeError for remote models with trust_remote_code=True",
-    "updated_at": "2026-02-12T15:39:38Z"
-  },
-  {
-    "additions": 61,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a collection of spelling errors found throughout `src/transformers` in docstrings, comments, and user-facing error messages. ## Modifications Corrected the following typos across multiple files in `src/transfo\u2026",
-    "changed_files": 45,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43949",
-    "created_at": "2026-02-12T14:15:42Z",
-    "deletions": 61,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43987",
+    "created_at": "2026-02-13T18:51:56Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43949/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43949",
+    "files_url": "https://github.com/huggingface/transformers/pull/43987/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43987",
     "labels": [],
     "merged": true,
-    "number": 43949,
-    "review_comments_count": 5,
+    "number": 43987,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix typos in docstrings, comments, and error messages",
-    "updated_at": "2026-02-12T16:26:10Z"
+    "title": "split out accelerator args builder method",
+    "updated_at": "2026-02-16T14:59:03Z"
   },
   {
-    "additions": 147,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes `get_num_of_image_tokens` in idefics3 and adds a test. Aloong the way fixes a few more models Reported in https://github.com/vllm-project/vllm/pull/34358",
-    "changed_files": 25,
+    "additions": 1828,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? The `_inner_training_loop` method has a lot going on which makes it hard to extend for downstream developers/libraries. This PR breaks it up into smaller well described methods that are chained in the training loop.\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43948",
-    "created_at": "2026-02-12T13:52:37Z",
-    "deletions": 69,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43985",
+    "created_at": "2026-02-13T17:55:01Z",
+    "deletions": 251,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43948/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43948",
+    "files_url": "https://github.com/huggingface/transformers/pull/43985/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43985",
     "labels": [],
-    "merged": true,
-    "number": 43948,
+    "merged": false,
+    "number": 43985,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `get_number_of_image_tokens`",
-    "updated_at": "2026-02-12T16:23:37Z"
+    "title": "Refactor inner training loop",
+    "updated_at": "2026-03-09T19:57:50Z"
   },
   {
-    "additions": 42,
-    "author": "casinca",
+    "additions": 2,
+    "author": "materight",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hello, This is a complementary PR to #42769 (not yet pushed, at the time of writing) in order to fix #42754 Tests passed (at least the ones with a test.) <!-- Congratulations! You've made it this far! You're not qui\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? Removes unused `.squeeze` from VJEPA2 embeddings rotation. Currently the squeeze does nothing on video input since torch skips it if the dimension is not 1. Exporting to onnx and compiling to TensorRT instead fails\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43947",
-    "created_at": "2026-02-12T13:20:24Z",
-    "deletions": 21,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43984",
+    "created_at": "2026-02-13T17:53:16Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43947/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43947",
+    "files_url": "https://github.com/huggingface/transformers/pull/43984/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43984",
     "labels": [],
     "merged": true,
-    "number": 43947,
+    "number": 43984,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Better weight decay exclusion in `run_*_no\u2011trainer.py` examples",
-    "updated_at": "2026-02-12T16:24:43Z"
+    "title": "Remove unused squeeze from VJEPA2 embeddings rotation",
+    "updated_at": "2026-02-13T21:56:01Z"
   },
   {
-    "additions": 5,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix docker files: some issues for `[dev-torch]` and `kernels`",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43946",
-    "created_at": "2026-02-12T13:01:56Z",
-    "deletions": 5,
+    "additions": 62,
+    "author": "Aki-07",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-24",
+    "cluster_ids": [
+      "cluster-43979-24"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43983",
+    "created_at": "2026-02-13T17:52:45Z",
+    "deletions": 188,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43946/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43946",
+    "files_url": "https://github.com/huggingface/transformers/pull/43983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43983",
     "labels": [],
     "merged": true,
-    "number": 43946,
-    "review_comments_count": 0,
+    "number": 43983,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix docker files",
-    "updated_at": "2026-02-12T13:11:21Z"
+    "title": "Migrate GPT2 to standardized output capture decorators",
+    "updated_at": "2026-02-18T10:40:51Z"
   },
   {
-    "additions": 25,
-    "author": "Cyrilvallez",
+    "additions": 1,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Some parts of the computation were not so exact. It should not change anything in general, but may improve perfs on constrained environments",
+    "body_excerpt": "# What does this PR do? This PR create a `.git-blame-ignore-revs` file to ignore the following commit https://github.com/huggingface/transformers/pull/43914 when using git blame.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43945",
-    "created_at": "2026-02-12T12:56:19Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43982",
+    "created_at": "2026-02-13T17:13:41Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43945/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43945",
+    "files_url": "https://github.com/huggingface/transformers/pull/43982/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43982",
     "labels": [],
     "merged": true,
-    "number": 43945,
+    "number": 43982,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve memory allocator during loading",
-    "updated_at": "2026-02-13T11:25:07Z"
+    "title": "create .git-blame-ignore-revs file ",
+    "updated_at": "2026-02-16T13:08:22Z"
   },
   {
-    "additions": 4,
+    "additions": 5,
     "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "We have flaky test failures in `tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py::Qwen3OmniMoeThinkerForConditionalGenerationModelTest::test_generate_continue_from_past_key_values`. The cause is that the logic in this test drops\u2026",
+    "body_excerpt": "Some of our image processors have a fast return for images that are already square. However, this fast return skips the `background_color` check, which causes flaky test failures because the `test_padding` test uses `self.assertRaises()` t\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43944",
-    "created_at": "2026-02-12T12:55:52Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43944/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43944",
-    "labels": [],
-    "merged": true,
-    "number": 43944,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix flaky test for multimodal LLMs",
-    "updated_at": "2026-02-12T13:30:17Z"
-  },
-  {
-    "additions": 6,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Replaces legacy `.format()` calls with f-strings in several model conversion scripts (`convert_*.py`). ## Modifications Used `flynt` to apply safe transformations to string literals in: - `src/transformers/models/im\u2026",
-    "changed_files": 6,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43943",
-    "created_at": "2026-02-12T11:49:23Z",
-    "deletions": 6,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43981",
+    "created_at": "2026-02-13T17:01:51Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43943/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43943",
+    "files_url": "https://github.com/huggingface/transformers/pull/43981/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43981",
     "labels": [],
     "merged": true,
-    "number": 43943,
-    "review_comments_count": 1,
+    "number": 43981,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Modernize string formatting (f-strings) in conversion scripts",
-    "updated_at": "2026-02-12T14:20:49Z"
+    "title": "Fix early image processor return not raising error",
+    "updated_at": "2026-02-16T16:40:41Z"
   },
   {
-    "additions": 20,
-    "author": "Cyrilvallez",
+    "additions": 3,
+    "author": "albertvillanova",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-add the overwritten test that was mistakenly removed in https://github.com/huggingface/transformers/pull/43916",
+    "body_excerpt": "# What does this PR do? Escape `%` in `help` for `ArgumentParser.add_argument` to fix TypeError: > TypeError: not enough arguments for format string Context: https://docs.python.org/3/library/argparse.html#help > As the help string support\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43942",
-    "created_at": "2026-02-12T11:20:08Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43980",
+    "created_at": "2026-02-13T15:43:52Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43942/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43942",
+    "files_url": "https://github.com/huggingface/transformers/pull/43980/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43980",
     "labels": [],
-    "merged": true,
-    "number": 43942,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix bark test",
-    "updated_at": "2026-02-12T11:34:22Z"
-  },
-  {
-    "additions": 113,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to file I/O operations in several `examples/pytorch/` scripts. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`). This causes crashes (`UnicodeDeco\u2026",
-    "changed_files": 9,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43941",
-    "created_at": "2026-02-12T11:09:14Z",
-    "deletions": 48,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43941/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43941",
-    "labels": [
-      "Code agent slop"
-    ],
     "merged": false,
-    "number": 43941,
-    "review_comments_count": 22,
+    "number": 43980,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix UnicodeDecodeError in PyTorch examples on Windows",
-    "updated_at": "2026-02-13T12:50:27Z"
+    "title": "Escape % in help for ArgumentParser.add_argument to fix TypeError",
+    "updated_at": "2026-02-17T17:30:07Z"
   }
 ]