diff --git "a/data/prs.json" "b/data/prs.json"
--- "a/data/prs.json"
+++ "b/data/prs.json"
@@ -25364,5 +25364,20522 @@
     "state": "open",
     "title": "Refactored vits to match standardized output collection interface",
     "updated_at": "2026-02-19T12:18:56Z"
+  },
+  {
+    "additions": 79,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44152",
+    "created_at": "2026-02-19T09:37:51Z",
+    "deletions": 45,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44152",
+    "labels": [],
+    "merged": true,
+    "number": 44152,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "AutoGrad support for grouped_mm fallback",
+    "updated_at": "2026-02-20T11:15:23Z"
+  },
+  {
+    "additions": 58,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the BioGPT m\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44151",
+    "created_at": "2026-02-19T06:55:43Z",
+    "deletions": 134,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44151/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44151",
+    "labels": [],
+    "merged": false,
+    "number": 44151,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor BioGPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:01Z"
+  },
+  {
+    "additions": 22,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the MPT mode\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44150",
+    "created_at": "2026-02-19T06:54:09Z",
+    "deletions": 73,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44150",
+    "labels": [],
+    "merged": false,
+    "number": 44150,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor MPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:02Z"
+  },
+  {
+    "additions": 85,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CPMAnt m\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44149",
+    "created_at": "2026-02-19T06:51:06Z",
+    "deletions": 201,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44149/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44149",
+    "labels": [],
+    "merged": false,
+    "number": 44149,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor CPMAnt output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:03Z"
+  },
+  {
+    "additions": 33,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the Bros mod\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44148",
+    "created_at": "2026-02-19T06:46:24Z",
+    "deletions": 124,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44148",
+    "labels": [],
+    "merged": false,
+    "number": 44148,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Bros output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:05Z"
+  },
+  {
+    "additions": 11,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CTRL mod\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44147",
+    "created_at": "2026-02-19T06:45:32Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44147/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44147",
+    "labels": [],
+    "merged": false,
+    "number": 44147,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor CTRL output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:06Z"
+  },
+  {
+    "additions": 38,
+    "author": "khushali9",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When using a step-based evaluation strategy (IntervalStrategy.STEPS), the trainer may skip evaluation at the final step if the last step does not align with eval_steps. This avoids missing the final evaluation while\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 18,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44146",
+    "created_at": "2026-02-19T05:29:21Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44146/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44146",
+    "labels": [],
+    "merged": true,
+    "number": 44146,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "Ensure final evaluation runs with step-based evaluation strategy",
+    "updated_at": "2026-03-26T16:30:40Z"
+  },
+  {
+    "additions": 400,
+    "author": "balvisio",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for sequence packing in the ESM2 model. Currently, the RotaryEmbedding class of the ESM2 model supports BSHD format. This PR makes the RotayEmbedding class aware of the`position_ids` and builds\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44145",
+    "created_at": "2026-02-19T02:58:50Z",
+    "deletions": 216,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44145/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44145",
+    "labels": [],
+    "merged": true,
+    "number": 44145,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "Add THD support in ESM",
+    "updated_at": "2026-04-09T14:40:26Z"
+  },
+  {
+    "additions": 1481,
+    "author": "TinderZ",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds 5 Chinese translations for common NLP task tutorials that were missing from the `docs/source/zh/tasks/` directory. The following files are added: - `tasks/sequence_classification.md` - \u6587\u672c\u5206\u7c7b - `tasks/tok\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44144",
+    "created_at": "2026-02-19T02:35:08Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44144",
+    "labels": [],
+    "merged": true,
+    "number": 44144,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] Add Chinese translations for common NLP task tutorials",
+    "updated_at": "2026-02-20T16:50:29Z"
+  },
+  {
+    "additions": 2,
+    "author": "nightcityblade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes minor typos in the `GenerationConfig` class docstring: - \"overriden\" \u2192 \"overridden\" - \"field that are\" \u2192 \"fields that are\" - \"Arg:\" \u2192 \"Args:\" (consistent with the rest of the docstring) No code changes, docum\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44143",
+    "created_at": "2026-02-18T23:07:23Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44143/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44143",
+    "labels": [],
+    "merged": true,
+    "number": 44143,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] Fix typos in GenerationConfig docstring",
+    "updated_at": "2026-02-19T13:24:09Z"
+  },
+  {
+    "additions": 72,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? so @Deep-unlearning noticed, benchmarking for the Open ASR leaderbaord, that the current implem is particularly slow. That would make sense since we go through every layer of the encoder forward, and that the stream\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44142",
+    "created_at": "2026-02-18T21:44:11Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44142",
+    "labels": [],
+    "merged": false,
+    "number": 44142,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[voxtral-realtime] get more perfs!",
+    "updated_at": "2026-02-23T17:25:45Z"
+  },
+  {
+    "additions": 42,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44102 (original account: @fumadari). ## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44141",
+    "created_at": "2026-02-18T21:14:53Z",
+    "deletions": 154,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44141/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44141",
+    "labels": [],
+    "merged": false,
+    "number": 44141,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-22T02:28:47Z"
+  },
+  {
+    "additions": 66,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44104 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44140",
+    "created_at": "2026-02-18T21:14:50Z",
+    "deletions": 207,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44140/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44140",
+    "labels": [],
+    "merged": false,
+    "number": 44140,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
+  },
+  {
+    "additions": 39,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44105 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44139",
+    "created_at": "2026-02-18T21:14:46Z",
+    "deletions": 127,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44139/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44139",
+    "labels": [],
+    "merged": false,
+    "number": 44139,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
+  },
+  {
+    "additions": 51,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44106 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44138",
+    "created_at": "2026-02-18T21:14:42Z",
+    "deletions": 132,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44138",
+    "labels": [],
+    "merged": false,
+    "number": 44138,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:49Z"
+  },
+  {
+    "additions": 43,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44107 (original account: @fumadari). ## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44137",
+    "created_at": "2026-02-18T21:14:39Z",
+    "deletions": 113,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44137/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44137",
+    "labels": [],
+    "merged": false,
+    "number": 44137,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
+  },
+  {
+    "additions": 37,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44108 (original account: @fumadari). ## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44136",
+    "created_at": "2026-02-18T21:14:35Z",
+    "deletions": 86,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44136",
+    "labels": [],
+    "merged": false,
+    "number": 44136,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
+  },
+  {
+    "additions": 48,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44109 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `H\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44135",
+    "created_at": "2026-02-18T21:14:31Z",
+    "deletions": 87,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44135",
+    "labels": [],
+    "merged": false,
+    "number": 44135,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
+  },
+  {
+    "additions": 28,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44110 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44134",
+    "created_at": "2026-02-18T21:14:27Z",
+    "deletions": 101,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44134/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44134",
+    "labels": [],
+    "merged": false,
+    "number": 44134,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
+  },
+  {
+    "additions": 30,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44111 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` dec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44133",
+    "created_at": "2026-02-18T21:12:22Z",
+    "deletions": 59,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44133/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44133",
+    "labels": [],
+    "merged": false,
+    "number": 44133,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:52Z"
+  },
+  {
+    "additions": 13,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? #43674 broke voxtral processor",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44132",
+    "created_at": "2026-02-18T20:13:15Z",
+    "deletions": 34,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44132/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44132",
+    "labels": [],
+    "merged": true,
+    "number": 44132,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "[voxtral] fix voxtral proc",
+    "updated_at": "2026-02-19T16:41:53Z"
+  },
+  {
+    "additions": 2,
+    "author": "cluster2600",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What Two small corrections in `docs/source/en/quantization/overview.md`: 1. **Typo fix**: `AuoQuant Notebook` \u2192 `AutoQuant Notebook` in the *User-Friendly Quantization Tools* section. The letter `t` was missing from the link text. 2. **\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44131",
+    "created_at": "2026-02-18T19:25:52Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44131/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44131",
+    "labels": [],
+    "merged": true,
+    "number": 44131,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: fix typo 'AuoQuant' \u2192 'AutoQuant' and clarify FINEGRAINED_FP8 library column",
+    "updated_at": "2026-02-18T20:49:47Z"
+  },
+  {
+    "additions": 302,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. This PR is the first big step towards removing the `cache_position` everywhere, as they are not needed in general and everything can be inferred from the cache itself. The major changes are the fol\u2026",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44130",
+    "created_at": "2026-02-18T11:58:54Z",
+    "deletions": 886,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44130/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44130",
+    "labels": [],
+    "merged": true,
+    "number": 44130,
+    "review_comments_count": 16,
+    "state": "closed",
+    "title": "[generate] Completely stop relying on `cache_position` to prepare inputs",
+    "updated_at": "2026-02-20T18:46:19Z"
+  },
+  {
+    "additions": 76,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors SpeechT5 to the standardized output tracing interface. - Adds `@capture_outputs` to base encoder/decoder forwards. - Adds `_can_record_outputs` mappings for hidden states and attentions. - Adds `@can_retur\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44129",
+    "created_at": "2026-02-18T11:24:13Z",
+    "deletions": 222,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44129/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44129",
+    "labels": [],
+    "merged": false,
+    "number": 44129,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor SpeechT5 output tracing to standardized output capture",
+    "updated_at": "2026-02-18T11:25:19Z"
+  },
+  {
+    "additions": 59,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Just makes sure we trigger dev version update",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44128",
+    "created_at": "2026-02-18T10:42:21Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44128/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44128",
+    "labels": [],
+    "merged": false,
+    "number": 44128,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "update release workflow",
+    "updated_at": "2026-03-30T13:40:19Z"
+  },
+  {
+    "additions": 3,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "when the model_type isn't in `TOKENIZER_MAPPING_NAMES` (ex. \"llama\"), `TOKENIZER_MAPPING_NAMES.get(\"llama\", \"\")` --> \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44127",
+    "created_at": "2026-02-18T10:41:48Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44127/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44127",
+    "labels": [],
+    "merged": true,
+    "number": 44127,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "AutoTokenizer ignores config when model_type is None",
+    "updated_at": "2026-02-18T14:47:52Z"
+  },
+  {
+    "additions": 17,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44126",
+    "created_at": "2026-02-18T09:58:49Z",
+    "deletions": 40,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44126/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44126",
+    "labels": [],
+    "merged": true,
+    "number": 44126,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Simplify input preparation in generate",
+    "updated_at": "2026-02-18T10:30:48Z"
+  },
+  {
+    "additions": 8,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44125",
+    "created_at": "2026-02-18T09:34:54Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44125/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44125",
+    "labels": [],
+    "merged": true,
+    "number": 44125,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Raise informative error when loading video processors",
+    "updated_at": "2026-02-20T08:23:35Z"
+  },
+  {
+    "additions": 10,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44124",
+    "created_at": "2026-02-18T08:52:23Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44124/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44124",
+    "labels": [],
+    "merged": false,
+    "number": 44124,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat: add eval_on_end to Trainer for final evaluation",
+    "updated_at": "2026-02-18T14:14:16Z"
+  },
+  {
+    "additions": 33,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44123",
+    "created_at": "2026-02-18T08:22:57Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44123/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44123",
+    "labels": [],
+    "merged": false,
+    "number": 44123,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Avoid device sync in training loss accumulation",
+    "updated_at": "2026-03-30T07:57:16Z"
+  },
+  {
+    "additions": 158,
+    "author": "adityuhkapoor",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44122",
+    "created_at": "2026-02-18T06:35:09Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44122/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44122",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44122,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add BnB 4-bit embedding quantization support",
+    "updated_at": "2026-02-18T14:27:25Z"
+  },
+  {
+    "additions": 14,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44120",
+    "created_at": "2026-02-17T23:56:48Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44120/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44120",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44120,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: allow image_transforms.resize to handle negative values after normalization",
+    "updated_at": "2026-02-18T14:08:54Z"
+  },
+  {
+    "additions": 1,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44119",
+    "created_at": "2026-02-17T23:53:20Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44119/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44119",
+    "labels": [],
+    "merged": false,
+    "number": 44119,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer",
+    "updated_at": "2026-02-18T14:04:47Z"
+  },
+  {
+    "additions": 32,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44118",
+    "created_at": "2026-02-17T23:31:31Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44118/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44118",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44118,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields",
+    "updated_at": "2026-02-18T14:18:12Z"
+  },
+  {
+    "additions": 27,
+    "author": "dtiourine",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44116",
+    "created_at": "2026-02-17T21:52:13Z",
+    "deletions": 102,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44116/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44116",
+    "labels": [],
+    "merged": false,
+    "number": 44116,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-17T21:53:23Z"
+  },
+  {
+    "additions": 2,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44115",
+    "created_at": "2026-02-17T21:32:55Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44115/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44115",
+    "labels": [],
+    "merged": true,
+    "number": 44115,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] fix broken chat_templating links in tasks docs",
+    "updated_at": "2026-02-23T16:27:57Z"
+  },
+  {
+    "additions": 716,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44114",
+    "created_at": "2026-02-17T21:17:35Z",
+    "deletions": 1237,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44114/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44114",
+    "labels": [],
+    "merged": false,
+    "number": 44114,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators",
+    "updated_at": "2026-02-18T20:34:53Z"
+  },
+  {
+    "additions": 5,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44113",
+    "created_at": "2026-02-17T20:01:32Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44113/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44113",
+    "labels": [],
+    "merged": true,
+    "number": 44113,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(testing): Update stale device override test in GraniteSpeech",
+    "updated_at": "2026-04-18T08:32:21Z"
+  },
+  {
+    "additions": 30,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44111",
+    "created_at": "2026-02-17T19:38:02Z",
+    "deletions": 59,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44111/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44111",
+    "labels": [],
+    "merged": false,
+    "number": 44111,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:22Z"
+  },
+  {
+    "additions": 28,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44110",
+    "created_at": "2026-02-17T19:32:55Z",
+    "deletions": 101,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44110/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44110",
+    "labels": [],
+    "merged": false,
+    "number": 44110,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:24Z"
+  },
+  {
+    "additions": 48,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44109",
+    "created_at": "2026-02-17T19:23:03Z",
+    "deletions": 87,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44109/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44109",
+    "labels": [],
+    "merged": false,
+    "number": 44109,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:25Z"
+  },
+  {
+    "additions": 33,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44108",
+    "created_at": "2026-02-17T19:15:00Z",
+    "deletions": 82,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44108/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44108",
+    "labels": [],
+    "merged": false,
+    "number": 44108,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:27Z"
+  },
+  {
+    "additions": 40,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44107",
+    "created_at": "2026-02-17T19:04:42Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44107/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44107",
+    "labels": [],
+    "merged": false,
+    "number": 44107,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:29Z"
+  },
+  {
+    "additions": 47,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44106",
+    "created_at": "2026-02-17T18:59:25Z",
+    "deletions": 132,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44106/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44106",
+    "labels": [],
+    "merged": false,
+    "number": 44106,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:30Z"
+  },
+  {
+    "additions": 39,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44105",
+    "created_at": "2026-02-17T18:54:40Z",
+    "deletions": 127,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44105/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44105",
+    "labels": [],
+    "merged": false,
+    "number": 44105,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:32Z"
+  },
+  {
+    "additions": 66,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44104",
+    "created_at": "2026-02-17T18:43:44Z",
+    "deletions": 207,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44104/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44104",
+    "labels": [],
+    "merged": false,
+    "number": 44104,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:34Z"
+  },
+  {
+    "additions": 53,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44103",
+    "created_at": "2026-02-17T18:04:48Z",
+    "deletions": 76,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44103/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44103",
+    "labels": [],
+    "merged": false,
+    "number": 44103,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix glm_moe_dsa",
+    "updated_at": "2026-02-18T19:38:11Z"
+  },
+  {
+    "additions": 42,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44102",
+    "created_at": "2026-02-17T17:21:32Z",
+    "deletions": 154,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44102/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44102",
+    "labels": [],
+    "merged": false,
+    "number": 44102,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-18T21:19:35Z"
+  },
+  {
+    "additions": 210,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44101",
+    "created_at": "2026-02-17T17:15:06Z",
+    "deletions": 194,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44101",
+    "labels": [],
+    "merged": false,
+    "number": 44101,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture",
+    "updated_at": "2026-02-19T08:08:33Z"
+  },
+  {
+    "additions": 3,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44100",
+    "created_at": "2026-02-17T17:10:36Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44100/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44100",
+    "labels": [],
+    "merged": true,
+    "number": 44100,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps",
+    "updated_at": "2026-02-20T09:57:51Z"
+  },
+  {
+    "additions": 2,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44099",
+    "created_at": "2026-02-17T16:45:35Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44099/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44099",
+    "labels": [],
+    "merged": true,
+    "number": 44099,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Bump dev version",
+    "updated_at": "2026-02-18T10:03:54Z"
+  },
+  {
+    "additions": 125,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors ViLT's output handling to align with the standardized patterns used across the codebase. Key changes: - Removes manual `hidden_states`/`attentions` propagation and passes `output_attentions`, `out\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44098",
+    "created_at": "2026-02-17T16:32:34Z",
+    "deletions": 138,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44098",
+    "labels": [],
+    "merged": false,
+    "number": 44098,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ViLT] Refactor output handling to align with standardized patterns",
+    "updated_at": "2026-02-17T16:37:46Z"
+  },
+  {
+    "additions": 12,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The `test_keep_in_fp32_modules` issues in #44052 are because the test assumes a model has **either** `_keep_in_fp32_modules` or `_keep_in_fp32_modules_strict` **but not both.** The only model that uses both is `glm_moe_dsa`, so this is the\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44097",
+    "created_at": "2026-02-17T15:43:55Z",
+    "deletions": 42,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44097",
+    "labels": [],
+    "merged": true,
+    "number": 44097,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Merge test_keep_in_fp32_modules and test_keep_in_fp32_modules_strict",
+    "updated_at": "2026-02-17T16:23:33Z"
+  },
+  {
+    "additions": 3,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tests were written (and pass) on DGX A100, here are the values for our runners.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44096",
+    "created_at": "2026-02-17T15:14:26Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44096/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44096",
+    "labels": [],
+    "merged": true,
+    "number": 44096,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[voxtral-realtime] update runner expected values ",
+    "updated_at": "2026-02-17T15:23:19Z"
+  },
+  {
+    "additions": 43,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. The check that was added in https://github.com/huggingface/transformers/pull/43768 is wrong, as a missing weight would NOT be reinitialized in some cases! As for the pointers check, it is actually\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44095",
+    "created_at": "2026-02-17T14:33:22Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44095/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44095",
+    "labels": [],
+    "merged": true,
+    "number": 44095,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix loading logic issue",
+    "updated_at": "2026-04-20T05:25:17Z"
+  },
+  {
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44094",
+    "created_at": "2026-02-17T14:15:10Z",
+    "deletions": 70,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44094",
+    "labels": [],
+    "merged": false,
+    "number": 44094,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:07Z"
+  },
+  {
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44093",
+    "created_at": "2026-02-17T14:15:07Z",
+    "deletions": 129,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44093/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44093",
+    "labels": [],
+    "merged": false,
+    "number": 44093,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:09Z"
+  },
+  {
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44092",
+    "created_at": "2026-02-17T14:15:04Z",
+    "deletions": 159,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44092/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44092",
+    "labels": [],
+    "merged": false,
+    "number": 44092,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-03-03T00:30:10Z"
+  },
+  {
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44091",
+    "created_at": "2026-02-17T14:14:56Z",
+    "deletions": 146,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44091/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44091",
+    "labels": [],
+    "merged": false,
+    "number": 44091,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:11Z"
+  },
+  {
+    "additions": 25,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "bos and eos behaviour should match when updating post processor setting `add_bos_token=True` when `bos_token=None` should silently disables `add_bos_token`. (was already the behavior for `eos_token`)",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44090",
+    "created_at": "2026-02-17T13:15:07Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44090/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44090",
+    "labels": [],
+    "merged": true,
+    "number": 44090,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update post proc",
+    "updated_at": "2026-02-18T15:34:18Z"
+  },
+  {
+    "additions": 113,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44089",
+    "created_at": "2026-02-17T11:37:18Z",
+    "deletions": 294,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44089/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44089",
+    "labels": [],
+    "merged": false,
+    "number": 44089,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor t5 output tracing",
+    "updated_at": "2026-02-17T13:45:23Z"
+  },
+  {
+    "additions": 41,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44088",
+    "created_at": "2026-02-17T11:32:42Z",
+    "deletions": 129,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44088",
+    "labels": [],
+    "merged": false,
+    "number": 44088,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-17T11:41:32Z"
+  },
+  {
+    "additions": 16,
+    "author": "huyxdang",
+    "author_association": "NONE",
+    "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44087",
+    "created_at": "2026-02-17T11:30:25Z",
+    "deletions": 33,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44087/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44087",
+    "labels": [],
+    "merged": false,
+    "number": 44087,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Mamba2 to use standardized output tracing",
+    "updated_at": "2026-03-11T02:08:22Z"
+  },
+  {
+    "additions": 16,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44086",
+    "created_at": "2026-02-17T11:21:22Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44086/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44086",
+    "labels": [],
+    "merged": false,
+    "number": 44086,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:22:25Z"
+  },
+  {
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44085",
+    "created_at": "2026-02-17T11:09:55Z",
+    "deletions": 108,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44085",
+    "labels": [],
+    "merged": false,
+    "number": 44085,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor RemBERT to use output tracing decorators",
+    "updated_at": "2026-02-17T11:10:59Z"
+  },
+  {
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44084",
+    "created_at": "2026-02-17T11:08:48Z",
+    "deletions": 108,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44084/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44084",
+    "labels": [],
+    "merged": false,
+    "number": 44084,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:41:38Z"
+  },
+  {
+    "additions": 1555,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "- TODO: - fsdp => faire comme tp en mode fsdp_plan manual qui devient l'auto par d\u00e9faut --- This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSD\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44083",
+    "created_at": "2026-02-17T10:57:06Z",
+    "deletions": 120,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44083/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44083",
+    "labels": [],
+    "merged": false,
+    "number": 44083,
+    "review_comments_count": 24,
+    "state": "open",
+    "title": "FSDP2 native support in transformers ",
+    "updated_at": "2026-04-14T13:58:30Z"
+  },
+  {
+    "additions": 6,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44082",
+    "created_at": "2026-02-17T10:09:03Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44082/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44082",
+    "labels": [],
+    "merged": true,
+    "number": 44082,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix patchtsmixer call to post_init",
+    "updated_at": "2026-02-17T11:05:40Z"
+  },
+  {
+    "additions": 48,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44081",
+    "created_at": "2026-02-17T09:54:01Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44081/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44081",
+    "labels": [],
+    "merged": true,
+    "number": 44081,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "add default flash impl",
+    "updated_at": "2026-02-19T11:29:54Z"
+  },
+  {
+    "additions": 22,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44080",
+    "created_at": "2026-02-17T09:53:36Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44080",
+    "labels": [],
+    "merged": true,
+    "number": 44080,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`",
+    "updated_at": "2026-02-20T10:08:38Z"
+  },
+  {
+    "additions": 19,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44078",
+    "created_at": "2026-02-17T09:15:34Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44078/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44078",
+    "labels": [],
+    "merged": true,
+    "number": 44078,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "[`fix`] Set input_modalities on various architectures that aren't just text",
+    "updated_at": "2026-02-24T10:39:31Z"
+  },
+  {
+    "additions": 11,
+    "author": "mmahjoub5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44076",
+    "created_at": "2026-02-17T08:46:55Z",
+    "deletions": 62,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44076",
+    "labels": [],
+    "merged": false,
+    "number": 44076,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs",
+    "updated_at": "2026-02-18T04:11:40Z"
+  },
+  {
+    "additions": 66,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44074",
+    "created_at": "2026-02-17T08:23:25Z",
+    "deletions": 52,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44074/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44074",
+    "labels": [],
+    "merged": false,
+    "number": 44074,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[TextNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:28:11Z"
+  },
+  {
+    "additions": 32,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44073",
+    "created_at": "2026-02-17T08:16:59Z",
+    "deletions": 38,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44073/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44073",
+    "labels": [],
+    "merged": false,
+    "number": 44073,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:29:01Z"
+  },
+  {
+    "additions": 12,
+    "author": "Siddhartha7340",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44072",
+    "created_at": "2026-02-17T07:42:01Z",
+    "deletions": 38,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44072/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44072",
+    "labels": [],
+    "merged": false,
+    "number": 44072,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026",
+    "updated_at": "2026-02-17T07:56:05Z"
+  },
+  {
+    "additions": 38,
+    "author": "ArivunidhiA",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44071",
+    "created_at": "2026-02-17T07:19:17Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44071/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44071",
+    "labels": [],
+    "merged": false,
+    "number": 44071,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[Refactor] Migrate MPT to standardized output tracing decorators",
+    "updated_at": "2026-02-17T07:20:17Z"
+  },
+  {
+    "additions": 272,
+    "author": "rudybear",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44070",
+    "created_at": "2026-02-17T07:18:13Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44070/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44070",
+    "labels": [],
+    "merged": false,
+    "number": 44070,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture",
+    "updated_at": "2026-04-17T16:39:28Z"
+  },
+  {
+    "additions": 26,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.",
+    "changed_files": 26,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44069",
+    "created_at": "2026-02-17T06:49:38Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44069/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44069",
+    "labels": [],
+    "merged": true,
+    "number": 44069,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Use torch.isfinite",
+    "updated_at": "2026-02-18T01:04:19Z"
+  },
+  {
+    "additions": 42,
+    "author": "mtthw13",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44018-2",
+    "cluster_ids": [
+      "cluster-44018-2"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44068",
+    "created_at": "2026-02-17T06:13:37Z",
+    "deletions": 119,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44068/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44068",
+    "labels": [],
+    "merged": false,
+    "number": 44068,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators",
+    "updated_at": "2026-02-18T08:30:32Z"
+  },
+  {
+    "additions": 63,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44067",
+    "created_at": "2026-02-17T05:25:26Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44067",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44067,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add `eval_on_end` argument to Trainer for final evaluation after training",
+    "updated_at": "2026-02-17T13:32:34Z"
+  },
+  {
+    "additions": 35,
+    "author": "Jay-IIT",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44066",
+    "created_at": "2026-02-17T05:12:11Z",
+    "deletions": 107,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44066",
+    "labels": [],
+    "merged": false,
+    "number": 44066,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor GPT-J to use standardized output tracing (#43979)",
+    "updated_at": "2026-02-18T18:44:28Z"
+  },
+  {
+    "additions": 21,
+    "author": "tysoncung",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44065",
+    "created_at": "2026-02-17T02:03:57Z",
+    "deletions": 76,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44065/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44065",
+    "labels": [],
+    "merged": false,
+    "number": 44065,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple",
+    "updated_at": "2026-02-25T00:49:18Z"
+  },
+  {
+    "additions": 57,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44064",
+    "created_at": "2026-02-17T01:10:31Z",
+    "deletions": 16,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44064/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44064",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44064,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat: implement eval_on_end to trigger evaluation after training",
+    "updated_at": "2026-02-17T13:32:40Z"
+  },
+  {
+    "additions": 229,
+    "author": "AutumnAurelium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44063",
+    "created_at": "2026-02-17T01:07:13Z",
+    "deletions": 150,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44063/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44063",
+    "labels": [],
+    "merged": true,
+    "number": 44063,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Update AFMoE architecture to use v5-style MoE impl",
+    "updated_at": "2026-03-19T14:00:46Z"
+  },
+  {
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44061",
+    "created_at": "2026-02-16T22:08:48Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44061",
+    "labels": [],
+    "merged": true,
+    "number": 44061,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: flaky `Kosmos2ModelTest` test",
+    "updated_at": "2026-02-18T14:23:30Z"
+  },
+  {
+    "additions": 44,
+    "author": "lakprigan",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary Migrates GPT2 to the standardized output collection interface as part of #43979. - Added `_can_record_outputs` to `GPT2PreTrainedModel` (including `cross_attentions` via `OutputRecorder` targeting the `crossattention` submodule)\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44059",
+    "created_at": "2026-02-16T20:14:30Z",
+    "deletions": 133,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44059/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44059",
+    "labels": [],
+    "merged": false,
+    "number": 44059,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[GPT2] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-25T17:47:45Z"
+  },
+  {
+    "additions": 122,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44052 \u2014 resolves 10 of 11 skipped tests for the `glm_moe_dsa` model. **Root causes fixed:** - **DSA indexer mask shape mismatch**: The attention mask was not properly normalized to 4D before being passed to the indexer an\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44058",
+    "created_at": "2026-02-16T19:24:30Z",
+    "deletions": 84,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44058/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44058",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44058,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix skipped tests for glm_moe_dsa model",
+    "updated_at": "2026-02-17T17:23:03Z"
+  },
+  {
+    "additions": 0,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR addresses memory efficiency issues in the Qwen2Moe implementation (reported in #43856). Users experienced Out-of-Memory (OOM) errors during quantization and inference, particularly with large reserved memory (e.g., 27GB on H100) th\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44057",
+    "created_at": "2026-02-16T18:35:01Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44057",
+    "labels": [],
+    "merged": false,
+    "number": 44057,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(qwen3_moe): optimize memory and fix OOM in MoE layers",
+    "updated_at": "2026-02-16T21:47:41Z"
+  },
+  {
+    "additions": 50,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates MPNet to the new standardized output tracing system using the `@capture_outputs` decorator. Specifically, this PR: - Applies `@capture_outputs` to `MPNetModel.forward` - Removes manual accumulation\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44056",
+    "created_at": "2026-02-16T18:27:05Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44056/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44056",
+    "labels": [],
+    "merged": false,
+    "number": 44056,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[MPNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:23:12Z"
+  },
+  {
+    "additions": 5,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44055",
+    "created_at": "2026-02-16T18:26:43Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44055/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44055",
+    "labels": [],
+    "merged": true,
+    "number": 44055,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix unprotected torch import",
+    "updated_at": "2026-02-16T18:43:01Z"
+  },
+  {
+    "additions": 346,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add flash MLA interface. - It does not work I get a segfault - we don't leverage the paged cache so it's not as efficient as that I reckon. ```bash Fetching 6 files: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44054",
+    "created_at": "2026-02-16T18:07:14Z",
+    "deletions": 93,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44054/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44054",
+    "labels": [],
+    "merged": false,
+    "number": 44054,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Flash mla interface",
+    "updated_at": "2026-02-20T11:14:39Z"
+  },
+  {
+    "additions": 2,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44053",
+    "created_at": "2026-02-16T17:59:48Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44053/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44053",
+    "labels": [],
+    "merged": true,
+    "number": 44053,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix peft conversion typo",
+    "updated_at": "2026-02-17T11:12:19Z"
+  },
+  {
+    "additions": 2,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix incorrect forward type hint for Gemma3n ## Details The type hint didn't match the actual returned class: https://github.com/huggingface/transformers/blob/349e00c1a367ce263624e525038250625dcf20c7/src/transforme\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44051",
+    "created_at": "2026-02-16T17:26:24Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44051/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44051",
+    "labels": [],
+    "merged": true,
+    "number": 44051,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`chore`] Fix incorrect forward type hint for Gemma3n",
+    "updated_at": "2026-02-20T09:08:07Z"
+  },
+  {
+    "additions": 15,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `get_text_features` for ChineseCLIP ### Details The `get_text_features` assumes that the `text_model` returns a `BaseModelOutputWithPooling`, just like is done with many other models. Currently, the `get_tex\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44050",
+    "created_at": "2026-02-16T17:23:31Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44050",
+    "labels": [],
+    "merged": false,
+    "number": 44050,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[`fix`] Patch `get_text_features` for ChineseCLIP",
+    "updated_at": "2026-02-17T09:55:17Z"
+  },
+  {
+    "additions": 59,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `fnet` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of #43979. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": FNetLayer}` to `FNetPreTrainedModel`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44049",
+    "created_at": "2026-02-16T17:19:04Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44049/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44049",
+    "labels": [],
+    "merged": false,
+    "number": 44049,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor fnet model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:13Z"
+  },
+  {
+    "additions": 4,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix up `__repr__` whitespace/brackets ## Reproducer ```python from transformers import AutoTokenizer, PreTrainedTokenizerBase # __repr__ via PreTrainedTokenizerBase tokenizer = AutoTokenizer.from_pretrained(\"bert-\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44048",
+    "created_at": "2026-02-16T17:18:10Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44048",
+    "labels": [],
+    "merged": true,
+    "number": 44048,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`simple`] Fix up `__repr__` whitespace/brackets",
+    "updated_at": "2026-02-20T10:03:34Z"
+  },
+  {
+    "additions": 35,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `bloom` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of the effort in #43979. ### Changes: - Add `_can_record_outputs` dict to `BloomPreTrainedModel` mapping `hi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44047",
+    "created_at": "2026-02-16T17:15:25Z",
+    "deletions": 104,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44047",
+    "labels": [],
+    "merged": false,
+    "number": 44047,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor bloom model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:14Z"
+  },
+  {
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44046",
+    "created_at": "2026-02-16T17:07:38Z",
+    "deletions": 70,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44046/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44046",
+    "labels": [],
+    "merged": false,
+    "number": 44046,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-02-17T14:15:23Z"
+  },
+  {
+    "additions": 456215,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4939,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44045",
+    "created_at": "2026-02-16T17:01:41Z",
+    "deletions": 591028,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44045/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44045",
+    "labels": [],
+    "merged": false,
+    "number": 44045,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Flash-mla-interface",
+    "updated_at": "2026-02-16T17:11:51Z"
+  },
+  {
+    "additions": 49,
+    "author": "rwtarpit",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44044",
+    "created_at": "2026-02-16T16:43:19Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44044/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44044",
+    "labels": [],
+    "merged": false,
+    "number": 44044,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor DeBERTa's output tracing interface",
+    "updated_at": "2026-02-16T18:57:29Z"
+  },
+  {
+    "additions": 170,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 31,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44043",
+    "created_at": "2026-02-16T16:23:57Z",
+    "deletions": 162,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44043/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44043",
+    "labels": [],
+    "merged": true,
+    "number": 44043,
+    "review_comments_count": 15,
+    "state": "closed",
+    "title": "`grouped_mm` fallback",
+    "updated_at": "2026-02-23T13:58:09Z"
+  },
+  {
+    "additions": 1,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Joao is regrettably no longer with us :saluting_face: so we should really stop getting users to ping him! This PR makes @cyrilvallez responsible for `generate` issues outside of VLMs.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44042",
+    "created_at": "2026-02-16T16:00:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44042/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44042",
+    "labels": [],
+    "merged": true,
+    "number": 44042,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update assignee for generate in bug report template",
+    "updated_at": "2026-02-16T16:09:19Z"
+  },
+  {
+    "additions": 469,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Alternate PR to #43985 to be a reorder only PR. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, s\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44041",
+    "created_at": "2026-02-16T15:40:41Z",
+    "deletions": 457,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44041/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44041",
+    "labels": [],
+    "merged": true,
+    "number": 44041,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "refactor _inner_training_loop to smaller methods",
+    "updated_at": "2026-02-23T16:52:09Z"
+  },
+  {
+    "additions": 366,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44008 and re-enables tests",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44040",
+    "created_at": "2026-02-16T12:43:28Z",
+    "deletions": 230,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44040/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44040",
+    "labels": [],
+    "merged": true,
+    "number": 44040,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "Fix gemma3n `get_audio_features`",
+    "updated_at": "2026-02-19T12:50:00Z"
+  },
+  {
+    "additions": 47,
+    "author": "itzyesse99-lgtm",
+    "author_association": "NONE",
+    "body_excerpt": "```diff diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index 1234567..8901234 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -10,6 +10,7 @@ from transformers import PreTrai\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44039",
+    "created_at": "2026-02-16T12:01:26Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44039/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44039",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44039,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "AI Fix for #43979",
+    "updated_at": "2026-03-14T12:34:32Z"
+  },
+  {
+    "additions": 23,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/43913",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44037",
+    "created_at": "2026-02-16T11:02:12Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44037/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44037",
+    "labels": [],
+    "merged": true,
+    "number": 44037,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Add a dim check mechanism in Transpose and fix qwen3_vl_moe weight mapping",
+    "updated_at": "2026-02-16T16:01:12Z"
+  },
+  {
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? So the following logic added in a previous PR #44033 could take effect ```python # `include_all` is `True` when the CI is running on a pull request, so it treats all failing tests # in the current CI run as \"new fai\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44036",
+    "created_at": "2026-02-16T10:14:54Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44036/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44036",
+    "labels": [],
+    "merged": true,
+    "number": 44036,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove `other_workflow_run_ids` for `issue_comment` in `utils/notification_service.py`",
+    "updated_at": "2026-02-16T10:24:07Z"
+  },
+  {
+    "additions": 25,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We already brought it back with: ```python if clean_up_tokenization_spaces: # Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement) if hasattr(self, \"clean_up_tokenization\") and callab\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44035",
+    "created_at": "2026-02-16T09:49:28Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44035/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44035",
+    "labels": [],
+    "merged": true,
+    "number": 44035,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "bring back our demons: clean_up_tokenization_spaces",
+    "updated_at": "2026-02-20T14:50:18Z"
+  },
+  {
+    "additions": 18,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44034",
+    "created_at": "2026-02-16T08:04:20Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44034/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44034",
+    "labels": [],
+    "merged": false,
+    "number": 44034,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "don't merge check workflow",
+    "updated_at": "2026-02-16T10:52:50Z"
+  },
+  {
+    "additions": 143,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Improve new failing test analysis for PR comment CI",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44033",
+    "created_at": "2026-02-16T07:30:33Z",
+    "deletions": 49,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44033/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44033",
+    "labels": [],
+    "merged": true,
+    "number": 44033,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve new failing test analysis for PR comment CI",
+    "updated_at": "2026-02-16T08:02:16Z"
+  },
+  {
+    "additions": 3,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update FP8 expert replacement to use `model.config.text_config` when available (VLMs), falling back to model.config if it's text-only models.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44032",
+    "created_at": "2026-02-16T06:02:28Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44032/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44032",
+    "labels": [],
+    "merged": true,
+    "number": 44032,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[Misc][vlms] Use text_config when initializing the fine-grained FP8Expert",
+    "updated_at": "2026-02-19T10:28:31Z"
+  },
+  {
+    "additions": 11,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`dpr` model as per #43979 cc @molbap <img width=\"853\" height=\"323\" alt=\"Screenshot 2026-02-16 at 9 13 30 AM\" src=\"https://github.com/user-attachments/assets/d658f1d0-75e8-4eac-8a12-9aeddf194dde\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44030",
+    "created_at": "2026-02-16T03:44:19Z",
+    "deletions": 58,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44030/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44030",
+    "labels": [],
+    "merged": false,
+    "number": 44030,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "refactor output tracing in `dpr`",
+    "updated_at": "2026-02-17T07:46:00Z"
+  },
+  {
+    "additions": 21,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`rwkv` model as per #43979 cc @molbap <img width=\"856\" height=\"333\" alt=\"Screenshot 2026-02-16 at 9 06 34 AM\" src=\"https://github.com/user-attachments/assets/9c8c5d41-ffbd-45f6-8b9b-1429bcb14543\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44029",
+    "created_at": "2026-02-16T03:37:13Z",
+    "deletions": 55,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44029/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44029",
+    "labels": [],
+    "merged": false,
+    "number": 44029,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `rwkv`",
+    "updated_at": "2026-02-17T07:47:02Z"
+  },
+  {
+    "additions": 13,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`superpoint` model as per #43979 cc @molbap <img width=\"857\" height=\"334\" alt=\"Screenshot 2026-02-16 at 8 53 43 AM\" src=\"https://github.com/user-attachments/assets/17781b76-743b-4b38-923a-8db3b94ccd01\" />",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44028",
+    "created_at": "2026-02-16T03:25:14Z",
+    "deletions": 46,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44028",
+    "labels": [],
+    "merged": false,
+    "number": 44028,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `superpoint`",
+    "updated_at": "2026-02-17T07:46:06Z"
+  },
+  {
+    "additions": 6,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `speech_encoder_decoder` model as per #43979 cc @molbap <img width=\"852\" height=\"335\" alt=\"Screenshot 2026-02-16 at 8 44 05 AM\" src=\"https://github.com/user-attachments/assets/ee25c72b-b995-403c-b47b-3e9cbae0d2cc\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44027",
+    "created_at": "2026-02-16T03:14:41Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44027/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44027",
+    "labels": [],
+    "merged": false,
+    "number": 44027,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `speech_encoder_decoder`",
+    "updated_at": "2026-02-17T09:04:35Z"
+  },
+  {
+    "additions": 12,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`vision_encoder_decoder` model as per #43979 cc @molbap <img width=\"849\" height=\"333\" alt=\"Screenshot 2026-02-16 at 8 28 20 AM\" src=\"https://github.com/user-attachments/assets/9f511a17-947b-46ed-82a8-8bb9bb103f15\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44026",
+    "created_at": "2026-02-16T02:59:14Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44026",
+    "labels": [],
+    "merged": false,
+    "number": 44026,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `vision_encoder_decoder`",
+    "updated_at": "2026-02-17T09:05:22Z"
+  },
+  {
+    "additions": 7,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `depth_anything` model as per #43979 cc @molbap <img width=\"840\" height=\"330\" alt=\"Screenshot 2026-02-16 at 8 25 01 AM\" src=\"https://github.com/user-attachments/assets/fe7770be-70cb-4343-accb-7407c6bbb4f8\" />",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44025",
+    "created_at": "2026-02-16T02:56:17Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44025/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44025",
+    "labels": [],
+    "merged": false,
+    "number": 44025,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `depth_anything`",
+    "updated_at": "2026-02-17T07:46:31Z"
+  },
+  {
+    "additions": 15,
+    "author": "mmahjoub5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR refactors the FocalNet implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44024",
+    "created_at": "2026-02-15T23:48:12Z",
+    "deletions": 60,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44024/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44024",
+    "labels": [],
+    "merged": false,
+    "number": 44024,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Focalnet standardized outputs",
+    "updated_at": "2026-02-17T08:47:48Z"
+  },
+  {
+    "additions": 32,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the Nystromformer model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Add `_can_record_outputs` on `Nystromform\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44023",
+    "created_at": "2026-02-15T21:53:48Z",
+    "deletions": 122,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44023/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44023",
+    "labels": [],
+    "merged": false,
+    "number": 44023,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Nystromformer output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:15Z"
+  },
+  {
+    "additions": 57,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the ConvBERT model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Move `ConvBertPreTrainedModel` after layer def\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44022",
+    "created_at": "2026-02-15T21:49:57Z",
+    "deletions": 152,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44022/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44022",
+    "labels": [],
+    "merged": false,
+    "number": 44022,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor ConvBERT output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:17Z"
+  },
+  {
+    "additions": 22,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43906 (related to #38071) ### Problem When using `pipeline('text-generation')` with batched inference on Qwen3 (and other models where `pad_token_id == bos_token_id`), a spurious warning is emitted: > A deco\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44021",
+    "created_at": "2026-02-15T21:45:58Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44021/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44021",
+    "labels": [],
+    "merged": true,
+    "number": 44021,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix false positive right-padding warning for decoder-only models in pipeline",
+    "updated_at": "2026-02-17T10:41:32Z"
+  },
+  {
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44020",
+    "created_at": "2026-02-15T21:39:17Z",
+    "deletions": 129,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44020/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44020",
+    "labels": [],
+    "merged": false,
+    "number": 44020,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:21Z"
+  },
+  {
+    "additions": 17,
+    "author": "Sid-V5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Refactored the `resnet` model to use the standardized output tracing decorators (`@capture_outputs` and `@can_return_tuple`) as part of the migration ### Changes | File | Change | |------|--------| | `modeling_resnet.py` | Migrated to `@ca\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44019",
+    "created_at": "2026-02-15T19:53:19Z",
+    "deletions": 62,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44019/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44019",
+    "labels": [],
+    "merged": false,
+    "number": 44019,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor `resnet` to use `@capture_outputs` / `@can_return_tuple` output tracing",
+    "updated_at": "2026-02-15T20:01:23Z"
+  },
+  {
+    "additions": 41,
+    "author": "yashbora9",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary - Migrates `gpt_neo` to the standardized output collection interface as part of #43979 - Adds `@capture_outputs` decorator on `GPTNeoModel.forward` (base model) - Adds `@can_return_tuple` decorator on all wrapper model forwards\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44018-2",
+    "cluster_ids": [
+      "cluster-44018-2"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44018",
+    "created_at": "2026-02-15T19:35:06Z",
+    "deletions": 109,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44018",
+    "labels": [],
+    "merged": false,
+    "number": 44018,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor GPT-Neo output tracing to use capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-16T20:33:37Z"
+  },
+  {
+    "additions": 13,
+    "author": "nexiouscaliver",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR refactors \\`SegformersPreTrainedModel\\` and \\`SegformersForImageClassification\\` to use standardized \\`@capture_outputs\\` and \\`@can_return_tuple\\` decorators for automatic output collection. ### Changes 1. **Imported \\`@capture_ou\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44017",
+    "created_at": "2026-02-15T19:27:22Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44017/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44017",
+    "labels": [],
+    "merged": false,
+    "number": 44017,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor output tracing in segformers (#43979)",
+    "updated_at": "2026-02-20T16:51:42Z"
+  },
+  {
+    "additions": 95,
+    "author": "akashadsare",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR migrates GPT-2 and its derivatives (GPTBigCode, Decision Transformer) to the new standardized output collection interface using the [@capture_outputs](vscode-file://vscode-app/usr/share/code/resources/app/out/vs/code/electron-brows\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44015",
+    "created_at": "2026-02-15T18:07:11Z",
+    "deletions": 231,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44015/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44015",
+    "labels": [],
+    "merged": false,
+    "number": 44015,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Refactor GPT2-based models to standardized output collection interface",
+    "updated_at": "2026-02-15T18:13:56Z"
+  },
+  {
+    "additions": 45,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43992 by preventing a false missing-key report for `UMT5EncoderModel` when `encoder.embed_tokens.weight` is tied to `shared.weight`. `UMT5EncoderModel` already declares tied weights, but loading checkpoints that only carr\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44014",
+    "created_at": "2026-02-15T15:17:22Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44014/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44014",
+    "labels": [],
+    "merged": false,
+    "number": 44014,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[UMT5] Ignore tied encoder embedding missing-key warning",
+    "updated_at": "2026-02-16T13:40:21Z"
+  },
+  {
+    "additions": 10,
+    "author": "gabrielfruet",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44013",
+    "created_at": "2026-02-15T13:49:53Z",
+    "deletions": 43,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44013/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44013",
+    "labels": [],
+    "merged": false,
+    "number": 44013,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Ouptut tracing: Standardizing MobileNetv2",
+    "updated_at": "2026-02-15T13:50:59Z"
+  },
+  {
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44012",
+    "created_at": "2026-02-15T11:20:17Z",
+    "deletions": 159,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44012/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44012",
+    "labels": [],
+    "merged": false,
+    "number": 44012,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-02-17T14:15:19Z"
+  },
+  {
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44011",
+    "created_at": "2026-02-15T11:11:02Z",
+    "deletions": 146,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44011/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44011",
+    "labels": [],
+    "merged": false,
+    "number": 44011,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:17Z"
+  },
+  {
+    "additions": 41,
+    "author": "preetam1407",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "#43979. Refactors SqueezeBert to the standardized output collection interface: - Adds `_can_record_outputs` in `SqueezeBertPreTrainedModel` - Adds `@capture_outputs` on `SqueezeBertModel.forward` - Adds `@can_return_tuple` on task model fo\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44010",
+    "created_at": "2026-02-15T09:40:09Z",
+    "deletions": 139,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44010/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44010",
+    "labels": [],
+    "merged": false,
+    "number": 44010,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "[SqueezeBert] Migrate to standardized output collection decorators",
+    "updated_at": "2026-04-13T08:20:09Z"
+  },
+  {
+    "additions": 1,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43976 Updated the documentation to reflect the actual Python requirement (3.10+) as defined in setup.py. Changes: Updated README.md .",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44009",
+    "created_at": "2026-02-15T08:51:26Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44009/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44009",
+    "labels": [],
+    "merged": true,
+    "number": 44009,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "update python requirement to 3.10+ to match codebase",
+    "updated_at": "2026-02-16T13:46:56Z"
+  },
+  {
+    "additions": 26,
+    "author": "pdwi2020",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary - refactor `ResNetModel` to use `@capture_outputs` for hidden-state collection - register `_can_record_outputs` on `ResNetPreTrainedModel` with `ResNetStage` - switch `ResNetForImageClassification` and `ResNetBackbone` to `@can_\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44007",
+    "created_at": "2026-02-15T07:26:52Z",
+    "deletions": 58,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44007/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44007",
+    "labels": [],
+    "merged": false,
+    "number": 44007,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ResNet] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-19T15:49:49Z"
+  },
+  {
+    "additions": 8,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR uses torch.xlogy for better numerical handling.",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44006",
+    "created_at": "2026-02-15T04:07:50Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44006/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44006",
+    "labels": [],
+    "merged": true,
+    "number": 44006,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Use torch.xlogy ",
+    "updated_at": "2026-02-17T00:42:54Z"
+  },
+  {
+    "additions": 224,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR transfers grid_thw to a python list at the beginning of some functions to reduce later CUDA sync calls. Therefore, several sync calls are merged into one call.",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44005",
+    "created_at": "2026-02-15T02:34:55Z",
+    "deletions": 254,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44005/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44005",
+    "labels": [],
+    "merged": true,
+    "number": 44005,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Reduce reduce CUDA sync",
+    "updated_at": "2026-02-17T01:00:52Z"
+  },
+  {
+    "additions": 21,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `codegen` model as per #43979 cc @molbap <img width=\"843\" height=\"445\" alt=\"Screenshot 2026-02-15 at 5 24 52 AM\" src=\"https://github.com/user-attachments/assets/d5aeb711-96a7-4fd8-af7b-0aeac23eeeb1\" /> 2 tests are bei\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44004",
+    "created_at": "2026-02-14T23:56:18Z",
+    "deletions": 62,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44004/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44004",
+    "labels": [],
+    "merged": false,
+    "number": 44004,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `codegen`",
+    "updated_at": "2026-02-17T08:56:07Z"
+  },
+  {
+    "additions": 37,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `mamba` model as per #43979 cc @molbap <img width=\"859\" height=\"427\" alt=\"Screenshot 2026-02-15 at 5 12 43 AM\" src=\"https://github.com/user-attachments/assets/f23bb675-a9a3-4e21-a6c5-9804910301b4\" /> Note - Only 46 te\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44003",
+    "created_at": "2026-02-14T23:46:10Z",
+    "deletions": 68,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44003/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44003",
+    "labels": [],
+    "merged": false,
+    "number": 44003,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `mamba`",
+    "updated_at": "2026-02-17T07:40:50Z"
+  },
+  {
+    "additions": 7,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `upernet` model as per #43979 cc @molbap <img width=\"856\" height=\"457\" alt=\"Screenshot 2026-02-15 at 4 51 03 AM\" src=\"https://github.com/user-attachments/assets/5dc478d7-d708-4296-a86b-c3bb252d0325\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44002",
+    "created_at": "2026-02-14T23:21:45Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44002/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44002",
+    "labels": [],
+    "merged": false,
+    "number": 44002,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `upernet`",
+    "updated_at": "2026-02-17T08:55:16Z"
+  },
+  {
+    "additions": 3,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`univnet` model as per #43979 cc @molbap <img width=\"848\" height=\"462\" alt=\"Screenshot 2026-02-15 at 4 19 00 AM\" src=\"https://github.com/user-attachments/assets/75848429-b9ff-49b3-a028-645aa67fc2ad\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44001",
+    "created_at": "2026-02-14T22:50:39Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44001/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44001",
+    "labels": [],
+    "merged": false,
+    "number": 44001,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `univnet`",
+    "updated_at": "2026-02-14T23:22:13Z"
+  },
+  {
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `vision_text_dual_encoder` model issue as per #43979 cc @molbap <img width=\"876\" height=\"292\" alt=\"Screenshot 2026-02-15 at 4 09 07 AM\" src=\"https://github.com/user-attachments/assets/11147a56-993b-4abc-b07a-ec739a53d\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44000",
+    "created_at": "2026-02-14T22:44:14Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44000/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44000",
+    "labels": [],
+    "merged": false,
+    "number": 44000,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `vision_text_dual_encoder`",
+    "updated_at": "2026-02-17T07:46:33Z"
+  },
+  {
+    "additions": 10,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `mobilenet_v1` model as per #43979 cc @molbap",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43999",
+    "created_at": "2026-02-14T22:20:19Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43999/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43999",
+    "labels": [],
+    "merged": false,
+    "number": 43999,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `mobilenet_v1`",
+    "updated_at": "2026-02-17T07:52:08Z"
+  },
+  {
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `timm_backbone` model issue as per #43979 cc @molbap <img width=\"856\" height=\"423\" alt=\"Screenshot 2026-02-15 at 4 10 15 AM\" src=\"https://github.com/user-attachments/assets/26237c3e-7b66-4f0d-a8b5-ffad6ee7c673\" />",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43998",
+    "created_at": "2026-02-14T22:12:30Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43998/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43998",
+    "labels": [],
+    "merged": false,
+    "number": 43998,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `timm_backbone`",
+    "updated_at": "2026-02-21T07:29:47Z"
+  },
+  {
+    "additions": 12,
+    "author": "karthiksuki",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR migrates the **RegNet** model to the standardized output collection interface as part of the ongoing refactoring effort in issue #43979. Specifically: - Adds the `_can_record_outputs` dictionary to `RegNetPr\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43997",
+    "created_at": "2026-02-14T19:57:54Z",
+    "deletions": 45,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43997/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43997",
+    "labels": [],
+    "merged": false,
+    "number": 43997,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Migrate RegNet to standardized output tracing",
+    "updated_at": "2026-02-14T20:10:22Z"
+  },
+  {
+    "additions": 44,
+    "author": "beelapranay",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors FNet and CVT output tracing to use the standardized decorators where appropriate. 1. FNet now uses @capture_outputs with _can_record_outputs to collect hidden states. 2. CVT keeps manual hidden-state colle\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-11",
+    "cluster_ids": [
+      "cluster-43979-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43996",
+    "created_at": "2026-02-14T17:55:31Z",
+    "deletions": 134,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43996/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43996",
+    "labels": [],
+    "merged": false,
+    "number": 43996,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor FNet and CVT output tracing",
+    "updated_at": "2026-02-14T18:10:17Z"
+  },
+  {
+    "additions": 21,
+    "author": "akeemlh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors falcon in partial fulfillment of https://github.com/huggingface/transformers/issues/43979 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43995",
+    "created_at": "2026-02-14T14:39:58Z",
+    "deletions": 87,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43995/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43995",
+    "labels": [],
+    "merged": false,
+    "number": 43995,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactoring falcon model to match standardized output collection interface",
+    "updated_at": "2026-02-14T14:41:00Z"
+  },
+  {
+    "additions": 12,
+    "author": "saurav0369",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### Docs: Fix Typos and Standardize Naming This PR fixes various typos, duplicate words, and capitalization inconsistencies across the documentation to improve readability and ensure professional branding. | File | Changes Made | | :--- |\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43993",
+    "created_at": "2026-02-14T10:11:40Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43993/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43993",
+    "labels": [],
+    "merged": true,
+    "number": 43993,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: fix typos across documentation files",
+    "updated_at": "2026-02-16T13:41:41Z"
+  },
+  {
+    "additions": 3,
+    "author": "taovinci0",
+    "author_association": "NONE",
+    "body_excerpt": "Replaces mutable default dict `weights={}` with `weights=None` and initializes inside the function. The dict is mutated via `weights[full_key] = w`, which can cause unexpected behavior across multiple calls.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43991",
+    "created_at": "2026-02-14T00:00:00Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43991/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43991",
+    "labels": [],
+    "merged": false,
+    "number": 43991,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: replace mutable default argument in _read_h5_weights",
+    "updated_at": "2026-02-16T11:18:06Z"
+  },
+  {
+    "additions": 10,
+    "author": "Abhijeetsingh610",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `AutoVideoProcessor` when `torchvision` is unavailable. `VIDEO_PROCESSOR_MAPPING_NAMES` can contain `None`, and `video_processor_class_from_name` was doing `if class_name in extractors`, which rais\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43989",
+    "created_at": "2026-02-13T20:48:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43989/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43989",
+    "labels": [],
+    "merged": false,
+    "number": 43989,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix AutoVideoProcessor class lookup when torchvision is unavailable",
+    "updated_at": "2026-02-18T17:52:34Z"
+  },
+  {
+    "additions": 7,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **LayoutXLM:** [This PR (rm slow tokenizers)](https://github.com/huggingface/transformers/pull/40936) changed [models/auto/tokenization_auto.py](\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43988",
+    "created_at": "2026-02-13T20:03:28Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43988/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43988",
+    "labels": [],
+    "merged": true,
+    "number": 43988,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(testing): Fix LayoutXLM tokenization test and LightOnOCR SDPA flash test failures on main CI",
+    "updated_at": "2026-04-18T08:32:43Z"
+  },
+  {
+    "additions": 47,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Accelerator has a lot of other args that can be passed to it like fp8 support, etc, but requires extensive monkey patching downstream to make it work. This makes it easier to extend the accelerator args building met\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43987",
+    "created_at": "2026-02-13T18:51:56Z",
+    "deletions": 38,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43987/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43987",
+    "labels": [],
+    "merged": true,
+    "number": 43987,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "split out accelerator args builder method",
+    "updated_at": "2026-02-16T14:59:03Z"
+  },
+  {
+    "additions": 1828,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? The `_inner_training_loop` method has a lot going on which makes it hard to extend for downstream developers/libraries. This PR breaks it up into smaller well described methods that are chained in the training loop.\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43985",
+    "created_at": "2026-02-13T17:55:01Z",
+    "deletions": 251,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43985/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43985",
+    "labels": [],
+    "merged": false,
+    "number": 43985,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor inner training loop",
+    "updated_at": "2026-03-09T19:57:50Z"
+  },
+  {
+    "additions": 2,
+    "author": "materight",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Removes unused `.squeeze` from VJEPA2 embeddings rotation. Currently the squeeze does nothing on video input since torch skips it if the dimension is not 1. Exporting to onnx and compiling to TensorRT instead fails\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43984",
+    "created_at": "2026-02-13T17:53:16Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43984/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43984",
+    "labels": [],
+    "merged": true,
+    "number": 43984,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove unused squeeze from VJEPA2 embeddings rotation",
+    "updated_at": "2026-02-13T21:56:01Z"
+  },
+  {
+    "additions": 62,
+    "author": "Aki-07",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43983",
+    "created_at": "2026-02-13T17:52:45Z",
+    "deletions": 188,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43983",
+    "labels": [],
+    "merged": true,
+    "number": 43983,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Migrate GPT2 to standardized output capture decorators",
+    "updated_at": "2026-02-18T10:40:51Z"
+  },
+  {
+    "additions": 1,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR create a `.git-blame-ignore-revs` file to ignore the following commit https://github.com/huggingface/transformers/pull/43914 when using git blame.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43982",
+    "created_at": "2026-02-13T17:13:41Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43982/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43982",
+    "labels": [],
+    "merged": true,
+    "number": 43982,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "create .git-blame-ignore-revs file ",
+    "updated_at": "2026-02-16T13:08:22Z"
+  },
+  {
+    "additions": 5,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some of our image processors have a fast return for images that are already square. However, this fast return skips the `background_color` check, which causes flaky test failures because the `test_padding` test uses `self.assertRaises()` t\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43981",
+    "created_at": "2026-02-13T17:01:51Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43981/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43981",
+    "labels": [],
+    "merged": true,
+    "number": 43981,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix early image processor return not raising error",
+    "updated_at": "2026-02-16T16:40:41Z"
+  },
+  {
+    "additions": 3,
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Escape `%` in `help` for `ArgumentParser.add_argument` to fix TypeError: > TypeError: not enough arguments for format string Context: https://docs.python.org/3/library/argparse.html#help > As the help string support\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43980",
+    "created_at": "2026-02-13T15:43:52Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43980/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43980",
+    "labels": [],
+    "merged": false,
+    "number": 43980,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Escape % in help for ArgumentParser.add_argument to fix TypeError",
+    "updated_at": "2026-02-17T17:30:07Z"
+  },
+  {
+    "additions": 0,
+    "author": "NicoSimo",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Minor fix, resolves some older references to Python3.9. Fixes #43976 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43978",
+    "created_at": "2026-02-13T14:40:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43978/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43978",
+    "labels": [],
+    "merged": false,
+    "number": 43978,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update references to Python3.9 to Python3.10. Resolves #43976",
+    "updated_at": "2026-02-13T17:00:07Z"
+  },
+  {
+    "additions": 48,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43977",
+    "created_at": "2026-02-13T13:18:49Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43977/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43977",
+    "labels": [],
+    "merged": true,
+    "number": 43977,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "stable grouped_mm API",
+    "updated_at": "2026-02-16T11:09:33Z"
+  },
+  {
+    "additions": 1659,
+    "author": "Abubakar-rashid",
+    "author_association": "NONE",
+    "body_excerpt": "This fixes issue #43957 reported by @xvdp, where models fail to load when using [torch.device('meta')](vscode-file://vscode-app/c:/Users/Priva/AppData/Local/Programs/Microsoft%20VS%20Code/_/resources/app/out/vs/code/electron-browser/workbe\u2026",
+    "changed_files": 28,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43974",
+    "created_at": "2026-02-13T11:27:42Z",
+    "deletions": 381,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43974/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43974",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43974,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: Replace torch.linspace().item() with python_linspace() to resolv\u2026",
+    "updated_at": "2026-02-16T13:46:49Z"
+  },
+  {
+    "additions": 3909,
+    "author": "MHRDYN7",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Add support for lfm 2/2.5 audio models. (closes #43909)",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43973",
+    "created_at": "2026-02-13T09:36:59Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43973/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43973",
+    "labels": [],
+    "merged": false,
+    "number": 43973,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add lfm2.5 audio",
+    "updated_at": "2026-02-21T16:42:21Z"
+  },
+  {
+    "additions": 2219,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Following Ernie, we build 3d positions based on `mm_token_type_ids` and the models will return them by default from `processor`. We have a unified `get_vision_position` in the qwen2-vl model file, all other models j\u2026",
+    "changed_files": 45,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43972",
+    "created_at": "2026-02-13T09:31:44Z",
+    "deletions": 1611,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43972/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43972",
+    "labels": [],
+    "merged": true,
+    "number": 43972,
+    "review_comments_count": 17,
+    "state": "closed",
+    "title": ":rotating_light: Unify 3D position ids",
+    "updated_at": "2026-03-05T18:48:30Z"
+  },
+  {
+    "additions": 65,
+    "author": "caffeinism",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? 1. According to the paper, this model is designed to reference 250 contexts (10 seconds), but the current implementation uses DynamicCache without employing create_sliding_window_causal_mask, causing it to reference\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43971",
+    "created_at": "2026-02-13T09:28:32Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43971/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43971",
+    "labels": [
+      "Audio"
+    ],
+    "merged": true,
+    "number": 43971,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[Mimi] Calibrate to ensure encoder streaming performs correctly",
+    "updated_at": "2026-02-23T14:20:01Z"
+  },
+  {
+    "additions": 542,
+    "author": "jackcook",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for quantization with [Four Over Six (4/6)](https://github.com/mit-han-lab/fouroversix). Our library currently focuses on quantizing linear layers to NVFP4, including weight, activation, and gra\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43970",
+    "created_at": "2026-02-13T05:15:44Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43970/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43970",
+    "labels": [],
+    "merged": true,
+    "number": 43970,
+    "review_comments_count": 28,
+    "state": "closed",
+    "title": "Add Four Over Six quantization integration",
+    "updated_at": "2026-02-25T09:30:09Z"
+  },
+  {
+    "additions": 6,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43969",
+    "created_at": "2026-02-13T03:47:24Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43969/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43969",
+    "labels": [],
+    "merged": false,
+    "number": 43969,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix for 2D weight error in embedding layer with ZeRO3",
+    "updated_at": "2026-02-20T07:32:14Z"
+  },
+  {
+    "additions": 90,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Enhances `get_json_schema()` and `render_jinja_template()` to support instance methods, class methods, and static methods, not just plain functions. Previously, `get_json_schema()` only worked with standalone functi\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43968",
+    "created_at": "2026-02-13T01:43:51Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43968/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43968",
+    "labels": [],
+    "merged": true,
+    "number": 43968,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Enhance JSON schema generation to support instance, static, and class methods",
+    "updated_at": "2026-02-13T18:01:56Z"
+  },
+  {
+    "additions": 3,
+    "author": "shtse8",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes the `AttributeError: 'List' object has no attribute 'dtype'` crash in `run_classification.py` when loading JSON data with list-type labels for multi-label classification (reported in #43116). ### Problem When\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43967",
+    "created_at": "2026-02-12T23:42:11Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43967/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43967",
+    "labels": [],
+    "merged": false,
+    "number": 43967,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix AttributeError in run_classification.py when detecting multi-label data",
+    "updated_at": "2026-02-12T23:42:11Z"
+  },
+  {
+    "additions": 10,
+    "author": "shtse8",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43867 When a model has sub-models with different naming conventions (e.g. `model.layers.26.self_attn.o_proj.weight` vs `desc_model.roberta.encoder.layers.7.norm1.weight`), `dot_natural_key` can produce lists\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43966",
+    "created_at": "2026-02-12T23:40:45Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43966/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43966",
+    "labels": [],
+    "merged": true,
+    "number": 43966,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix TypeError in dot_natural_key when state_dict keys have mixed types at same position",
+    "updated_at": "2026-02-13T17:39:52Z"
+  },
+  {
+    "additions": 77,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates `tokenizer_summary.md`: - condense intro and subword tokenization sections since this doc is mostly about BPE/Unigram/WordPiece - removes some redundant and introductory motivation content and links to the course for more info - pl\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43965",
+    "created_at": "2026-02-12T22:08:33Z",
+    "deletions": 200,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43965/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43965",
+    "labels": [],
+    "merged": true,
+    "number": 43965,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[docs] tokenizer summary",
+    "updated_at": "2026-02-17T18:17:25Z"
+  },
+  {
+    "additions": 72,
+    "author": "tohtana",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes a ZeRO-3 checkpoint loading failure in Transformers\u2019 conversion-mapped loading path. In affected cases, many parameters are reported as missing and are actually not restored from checkpoint (they get reinitialized). `transfor\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43964",
+    "created_at": "2026-02-12T22:01:48Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43964/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43964",
+    "labels": [],
+    "merged": false,
+    "number": 43964,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix ZeRO-3 conversion-mapped checkpoint loading",
+    "updated_at": "2026-02-13T22:58:54Z"
+  },
+  {
+    "additions": 5,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors usages of `file.readlines()` to more Pythonic equivalents (`list(file)` or direct iteration) in core tokenization and utility files. **Key Improvements:** 1. **Memory Optimization:** Replaced `list(f.readl\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43963",
+    "created_at": "2026-02-12T21:16:47Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43963/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43963",
+    "labels": [],
+    "merged": false,
+    "number": 43963,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor redundant .readlines() with list()",
+    "updated_at": "2026-02-13T12:49:22Z"
+  },
+  {
+    "additions": 59,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors `tests/cli/test_serve.py` to use native Python `assert` statements instead of legacy `unittest.TestCase` assertion methods. This modernization aligns the CLI tests with `pytest` best practices, enabling: -\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43962",
+    "created_at": "2026-02-12T18:02:43Z",
+    "deletions": 64,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43962/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43962",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43962,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Refactor CLI tests using native pytest assertions",
+    "updated_at": "2026-02-13T12:49:11Z"
+  },
+  {
+    "additions": 12,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a common Python pitfall regarding **mutable default arguments**. In Python, default arguments are evaluated only once at function definition time. If a mutable object (like a `list`) is used as a default, that\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43961",
+    "created_at": "2026-02-12T17:31:19Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43961/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43961",
+    "labels": [],
+    "merged": false,
+    "number": 43961,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Replace mutable default arguments with None",
+    "updated_at": "2026-02-13T12:45:04Z"
+  },
+  {
+    "additions": 1194,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR adds the asynchronous batching feature to continuous batching (CB). Asynchronous batching, through the use of more VRAM and CUDA streams and events, greatly reduces the CPU overhead of preparing and updating batches by hi\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43960",
+    "created_at": "2026-02-12T17:20:38Z",
+    "deletions": 666,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43960/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43960",
+    "labels": [],
+    "merged": true,
+    "number": 43960,
+    "review_comments_count": 39,
+    "state": "closed",
+    "title": "[CB] [Major] Asynchronous batching",
+    "updated_at": "2026-02-23T10:11:28Z"
+  },
+  {
+    "additions": 32,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes potential `UnicodeDecodeError` on Windows (and other environments where the default encoding is not UTF-8) by enforcing `encoding=\"utf-8\"` in standard `open()` calls across the core library. ## Modifications A\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43959",
+    "created_at": "2026-02-12T17:00:15Z",
+    "deletions": 32,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43959/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43959",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43959,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Enforce explicit UTF-8 encoding in core Library to prevent Windows crashes",
+    "updated_at": "2026-02-13T12:50:43Z"
+  },
+  {
+    "additions": 2,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update the quantization docker file to add kernels",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43958",
+    "created_at": "2026-02-12T16:31:42Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43958/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43958",
+    "labels": [],
+    "merged": false,
+    "number": 43958,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[kernels] update docker file",
+    "updated_at": "2026-03-25T10:31:24Z"
+  },
+  {
+    "additions": 8,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43913 After scanning the collection [here](https://huggingface.co/collections/Qwen/qwen3-vl), all models already have merged experts but need a transpose",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43956",
+    "created_at": "2026-02-12T16:16:08Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43956/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43956",
+    "labels": [],
+    "merged": false,
+    "number": 43956,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix qwen3-vl-moe weight mapping",
+    "updated_at": "2026-02-12T17:29:22Z"
+  },
+  {
+    "additions": 5,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes kernel versions for new builds with torch 2.10",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43955",
+    "created_at": "2026-02-12T16:01:58Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43955/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43955",
+    "labels": [],
+    "merged": true,
+    "number": 43955,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[kernels] fix kernel versions ",
+    "updated_at": "2026-02-13T07:32:33Z"
+  },
+  {
+    "additions": 3,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The LASR model uses `layerdrop`, which we forgot to disable in the tests. Since the tests only have 2 hidden layers, and the default layerdrop chance is `0.1`, this means there's a `0.1^2 = 1%` chance in any test that doesn't call `model.e\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43954",
+    "created_at": "2026-02-12T14:45:01Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43954/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43954",
+    "labels": [],
+    "merged": true,
+    "number": 43954,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix LASR test layerdrop issue",
+    "updated_at": "2026-02-12T17:03:42Z"
+  },
+  {
+    "additions": 1,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43931, no transpose needed after standardizing the model impl to inherit from Qwen3-MoE",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43953",
+    "created_at": "2026-02-12T14:43:59Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43953/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43953",
+    "labels": [],
+    "merged": false,
+    "number": 43953,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix conversion mapping for Qwen3VL-MoE",
+    "updated_at": "2026-02-16T18:02:05Z"
+  },
+  {
+    "additions": 78,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5088 We shouldn't use `rope_deltas` from prev `generation/forward` call if this is a new generation from scratch. This is already correctly implemented in `compute_3d_\u2026",
+    "changed_files": 14,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43952",
+    "created_at": "2026-02-12T14:34:52Z",
+    "deletions": 31,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43952/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43952",
+    "labels": [],
+    "merged": true,
+    "number": 43952,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix qwen-vl position ids when generating several times",
+    "updated_at": "2026-02-12T16:22:39Z"
+  },
+  {
+    "additions": 4,
+    "author": "lordaarush",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43883 After #42270, `all_tied_weights_keys` is initialized in `post_init()`, but remote models loaded with `trust_remote_code=True` don't always call `post_init()` properly, causing `AttributeError` when load\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43951",
+    "created_at": "2026-02-12T14:32:00Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43951/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43951",
+    "labels": [],
+    "merged": false,
+    "number": 43951,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix AttributeError for remote models with trust_remote_code=True",
+    "updated_at": "2026-02-12T15:39:38Z"
+  },
+  {
+    "additions": 61,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a collection of spelling errors found throughout `src/transformers` in docstrings, comments, and user-facing error messages. ## Modifications Corrected the following typos across multiple files in `src/transfo\u2026",
+    "changed_files": 45,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43949",
+    "created_at": "2026-02-12T14:15:42Z",
+    "deletions": 61,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43949/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43949",
+    "labels": [],
+    "merged": true,
+    "number": 43949,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Fix typos in docstrings, comments, and error messages",
+    "updated_at": "2026-02-12T16:26:10Z"
+  },
+  {
+    "additions": 147,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes `get_num_of_image_tokens` in idefics3 and adds a test. Aloong the way fixes a few more models Reported in https://github.com/vllm-project/vllm/pull/34358",
+    "changed_files": 25,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43948",
+    "created_at": "2026-02-12T13:52:37Z",
+    "deletions": 69,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43948/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43948",
+    "labels": [],
+    "merged": true,
+    "number": 43948,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `get_number_of_image_tokens`",
+    "updated_at": "2026-02-12T16:23:37Z"
+  },
+  {
+    "additions": 42,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, This is a complementary PR to #42769 (not yet pushed, at the time of writing) in order to fix #42754 Tests passed (at least the ones with a test.) <!-- Congratulations! You've made it this far! You're not qui\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43947",
+    "created_at": "2026-02-12T13:20:24Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43947/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43947",
+    "labels": [],
+    "merged": true,
+    "number": 43947,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: Better weight decay exclusion in `run_*_no\u2011trainer.py` examples",
+    "updated_at": "2026-02-12T16:24:43Z"
+  },
+  {
+    "additions": 5,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix docker files: some issues for `[dev-torch]` and `kernels`",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43946",
+    "created_at": "2026-02-12T13:01:56Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43946/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43946",
+    "labels": [],
+    "merged": true,
+    "number": 43946,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix docker files",
+    "updated_at": "2026-02-12T13:11:21Z"
+  },
+  {
+    "additions": 25,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Some parts of the computation were not so exact. It should not change anything in general, but may improve perfs on constrained environments",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43945",
+    "created_at": "2026-02-12T12:56:19Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43945/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43945",
+    "labels": [],
+    "merged": true,
+    "number": 43945,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve memory allocator during loading",
+    "updated_at": "2026-02-13T11:25:07Z"
+  },
+  {
+    "additions": 4,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We have flaky test failures in `tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py::Qwen3OmniMoeThinkerForConditionalGenerationModelTest::test_generate_continue_from_past_key_values`. The cause is that the logic in this test drops\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43944",
+    "created_at": "2026-02-12T12:55:52Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43944/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43944",
+    "labels": [],
+    "merged": true,
+    "number": 43944,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix flaky test for multimodal LLMs",
+    "updated_at": "2026-02-12T13:30:17Z"
+  },
+  {
+    "additions": 6,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Replaces legacy `.format()` calls with f-strings in several model conversion scripts (`convert_*.py`). ## Modifications Used `flynt` to apply safe transformations to string literals in: - `src/transformers/models/im\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43943",
+    "created_at": "2026-02-12T11:49:23Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43943/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43943",
+    "labels": [],
+    "merged": true,
+    "number": 43943,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Modernize string formatting (f-strings) in conversion scripts",
+    "updated_at": "2026-02-12T14:20:49Z"
+  },
+  {
+    "additions": 20,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Re-add the overwritten test that was mistakenly removed in https://github.com/huggingface/transformers/pull/43916",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43942",
+    "created_at": "2026-02-12T11:20:08Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43942/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43942",
+    "labels": [],
+    "merged": true,
+    "number": 43942,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix bark test",
+    "updated_at": "2026-02-12T11:34:22Z"
+  },
+  {
+    "additions": 113,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to file I/O operations in several `examples/pytorch/` scripts. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`). This causes crashes (`UnicodeDeco\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43941",
+    "created_at": "2026-02-12T11:09:14Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43941/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43941",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43941,
+    "review_comments_count": 22,
+    "state": "closed",
+    "title": "Fix UnicodeDecodeError in PyTorch examples on Windows",
+    "updated_at": "2026-02-13T12:50:27Z"
+  },
+  {
+    "additions": 23,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh This PR fixes failed case: `tests/models/exaone_moe/test_modeling_exaone_moe.py::ExaoneMoeIntegrationTest::test_model_logits`, pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43938",
+    "created_at": "2026-02-12T09:16:04Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43938/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43938",
+    "labels": [],
+    "merged": true,
+    "number": 43938,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix failed test case for exaone_moe model",
+    "updated_at": "2026-04-13T02:40:25Z"
+  },
+  {
+    "additions": 47,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43936",
+    "created_at": "2026-02-12T08:34:03Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43936/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43936",
+    "labels": [],
+    "merged": true,
+    "number": 43936,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "Fix failed unit tests for moonshine_streaming model",
+    "updated_at": "2026-04-09T02:32:47Z"
+  },
+  {
+    "additions": 1245,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds mlx quantization for mps devices leveraging the `kernels` library for pre-built kernels !!",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43934",
+    "created_at": "2026-02-12T07:59:02Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43934/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43934",
+    "labels": [],
+    "merged": true,
+    "number": 43934,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": "[Quantization] Add metal quantization for MPS devices!",
+    "updated_at": "2026-02-27T13:28:31Z"
+  },
+  {
+    "additions": 66,
+    "author": "quic-meetkuma",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds hardware backend called \"qaic\" which is for Qualcomm's AI Accelerator. The inclusion is similar to any other hardware backend in the Trainer. With this the user will be able to use Qualcomm's AI Acceler\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43933",
+    "created_at": "2026-02-12T06:14:52Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43933/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43933",
+    "labels": [],
+    "merged": false,
+    "number": 43933,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Added support for qaic backend for Qualcomm's AI Accelerator",
+    "updated_at": "2026-02-17T16:53:38Z"
+  },
+  {
+    "additions": 3,
+    "author": "quic-meetkuma",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? It fixes minor issue in the gather_full_tensor function. The existing implementation tries to perform all_gather across all the ranks of distributed training. But when we talk about the case of TP+DP then we only ne\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43932",
+    "created_at": "2026-02-12T05:49:32Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43932/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43932",
+    "labels": [],
+    "merged": true,
+    "number": 43932,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Inclusion of process_group in the gather_full_tensor function in tensor_parallel.py",
+    "updated_at": "2026-02-13T14:29:24Z"
+  },
+  {
+    "additions": 25,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? We can use PT 2.4.0 is_autocast_enabled changes to improve code.",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43930",
+    "created_at": "2026-02-12T02:04:11Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43930/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43930",
+    "labels": [],
+    "merged": true,
+    "number": 43930,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve use of torch.is_autocast_enabled",
+    "updated_at": "2026-02-17T00:42:39Z"
+  },
+  {
+    "additions": 174,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "the second part to #43900. this pr focuses on customizing tokenizers: - training your own - passing a custom vocabulary to `vocab` and `merges` - subclassing `TokenizersBackend`",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43929",
+    "created_at": "2026-02-11T23:20:18Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43929/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43929",
+    "labels": [],
+    "merged": true,
+    "number": 43929,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] customizing tokenizers",
+    "updated_at": "2026-02-17T17:15:32Z"
+  },
+  {
+    "additions": 48,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 Tests that created `DiaConfig` with custom token IDs (`eos_token_id=97` for a `vocab_size=100`) failed because saving then reloading the co\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43928",
+    "created_at": "2026-02-11T19:59:42Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43928/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43928",
+    "labels": [],
+    "merged": true,
+    "number": 43928,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "fix(models): Preserve custom token IDs through DiaConfig save and load",
+    "updated_at": "2026-02-13T10:56:31Z"
+  },
+  {
+    "additions": 24,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43926",
+    "created_at": "2026-02-11T19:45:46Z",
+    "deletions": 105,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43926/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43926",
+    "labels": [],
+    "merged": true,
+    "number": 43926,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Deespeed] fix WeightConverter.convert() use",
+    "updated_at": "2026-02-16T14:07:30Z"
+  },
+  {
+    "additions": 5,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to `open()` calls in `.circleci/create_circleci_config.py` and `.circleci/parse_test_outputs.py`. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`)\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43925",
+    "created_at": "2026-02-11T19:18:51Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43925/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43925",
+    "labels": [],
+    "merged": true,
+    "number": 43925,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add explicit utf-8 encoding to CircleCI scripts for Windows compatibility",
+    "updated_at": "2026-02-12T13:59:02Z"
+  },
+  {
+    "additions": 576,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "WIP",
+    "changed_files": 48,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43924",
+    "created_at": "2026-02-11T17:35:45Z",
+    "deletions": 726,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43924/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43924",
+    "labels": [],
+    "merged": false,
+    "number": 43924,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[`Attn`] More old mask APIs",
+    "updated_at": "2026-02-11T22:34:32Z"
+  },
+  {
+    "additions": 0,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The `no_grad` decorators created problems, especially when new tensors are created! Reverting that PR until we can narrow down exactly which areas are problematic.",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43923",
+    "created_at": "2026-02-11T16:39:30Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43923/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43923",
+    "labels": [],
+    "merged": true,
+    "number": 43923,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Revert #43897",
+    "updated_at": "2026-02-11T17:21:07Z"
+  },
+  {
+    "additions": 2,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a hard crash (`AttributeError`) in `src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py` caused by `collections.MutableMapping`, which was removed in Python 3.10. ## The Problem The script imp\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43922",
+    "created_at": "2026-02-11T16:22:54Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43922/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43922",
+    "labels": [],
+    "merged": true,
+    "number": 43922,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix AttributeError in OwlViT conversion script for Python 3.10+",
+    "updated_at": "2026-02-12T08:33:47Z"
+  },
+  {
+    "additions": 3,
+    "author": "svlandeg",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Hi \ud83d\udc4b Typer maintainer here. # What does this PR do? We used to offer `typer-slim` as a more light-weight package, which didn't depend on `rich` and `shellingham`. Unfortuntately, the way it was set up meant that we ran into issues with pac\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43921",
+    "created_at": "2026-02-11T15:54:04Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43921/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43921",
+    "labels": [],
+    "merged": true,
+    "number": 43921,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Depend on `typer` instead of `typer-slim`",
+    "updated_at": "2026-02-16T19:02:58Z"
+  },
+  {
+    "additions": 1,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. This is a core file, and we cannot allow to change it without triggering everything - see https://github.com/huggingface/transformers/pull/43897#issuecomment-3885203477 as well",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43920",
+    "created_at": "2026-02-11T15:42:20Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43920/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43920",
+    "labels": [],
+    "merged": true,
+    "number": 43920,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Changes to cache_utils should trigger all tests all the time",
+    "updated_at": "2026-02-11T15:51:31Z"
+  },
+  {
+    "additions": 19,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR enables to use `sync_each_batch` argument when passing `gradient_accumulation_kwargs` in `AcceleratorConfig`. I'm also removing `adjust_scheduler` docstring as it is not used/enabled for now as we don't prep\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43919",
+    "created_at": "2026-02-11T15:37:05Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43919/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43919",
+    "labels": [],
+    "merged": true,
+    "number": 43919,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix sync gradient",
+    "updated_at": "2026-02-13T14:38:09Z"
+  },
+  {
+    "additions": 2,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43918",
+    "created_at": "2026-02-11T15:26:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43918/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43918",
+    "labels": [],
+    "merged": false,
+    "number": 43918,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "foo",
+    "updated_at": "2026-02-11T15:46:51Z"
+  },
+  {
+    "additions": 1157,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Integrates a library wide monkey patching API to fix #43284 and allow things like restructuring the expert weights and fusing the qkv projections. A concrete and complete example: <details> ```py from typing import\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43917",
+    "created_at": "2026-02-11T15:08:30Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43917/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43917",
+    "labels": [],
+    "merged": true,
+    "number": 43917,
+    "review_comments_count": 38,
+    "state": "closed",
+    "title": "Model patching API",
+    "updated_at": "2026-03-02T10:10:50Z"
+  },
+  {
+    "additions": 616,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. See discussion in https://github.com/huggingface/transformers/issues/43885",
+    "changed_files": 296,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43916",
+    "created_at": "2026-02-11T14:36:44Z",
+    "deletions": 721,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43916/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43916",
+    "labels": [],
+    "merged": true,
+    "number": 43916,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Harmonize `input_embeds` to `inputs_embeds` everywhere",
+    "updated_at": "2026-02-11T16:03:56Z"
+  },
+  {
+    "additions": 272,
+    "author": "merveenoyan",
+    "author_association": "MEMBER",
+    "body_excerpt": "notebook to converted models e2e inference: https://colab.research.google.com/drive/1g-Vc-Zvjy_STNEUbWJhYDTpFyT7o6TGl?usp=sharing models: https://huggingface.co/merve/PaddleOCR-VL-hf https://huggingface.co/merve/PaddleOCR-VL-1.5-hf @molbap",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43915",
+    "created_at": "2026-02-11T14:30:59Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43915/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43915",
+    "labels": [],
+    "merged": false,
+    "number": 43915,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "add PaddleOCR-VL conversion",
+    "updated_at": "2026-02-11T14:40:51Z"
+  },
+  {
+    "additions": 2409,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR reorder all methods from Trainer. No modifications was done to the methods itself. With this final PR, all the major changes should be done and we can focus on simplifying the core logic + testing. Reorderin\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43914",
+    "created_at": "2026-02-11T13:36:39Z",
+    "deletions": 2323,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43914/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43914",
+    "labels": [],
+    "merged": true,
+    "number": 43914,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Reorder Trainer methods",
+    "updated_at": "2026-02-13T17:07:59Z"
+  },
+  {
+    "additions": 38,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/43299#issuecomment-3849688073",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43913",
+    "created_at": "2026-02-11T12:44:05Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43913/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43913",
+    "labels": [],
+    "merged": false,
+    "number": 43913,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add a sentinel mechanism in Transpose and fix qwen3_vl_moe weight mapping",
+    "updated_at": "2026-02-16T15:53:09Z"
+  },
+  {
+    "additions": 790,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module class - fix mlp layers mismatch - implement Attention.forward() # Current st\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43912",
+    "created_at": "2026-02-11T12:37:41Z",
+    "deletions": 348,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43912/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43912",
+    "labels": [],
+    "merged": true,
+    "number": 43912,
+    "review_comments_count": 18,
+    "state": "closed",
+    "title": "[fix][wip] GlmMoeDsa: try implement DSA",
+    "updated_at": "2026-03-04T16:30:36Z"
+  },
+  {
+    "additions": 1,
+    "author": "pavel-esir",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Without this like `AutoTokenizer.from_pretrained(...)` does not create `LlamaTokenizer` object. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the ca\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43911",
+    "created_at": "2026-02-11T10:53:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43911/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43911",
+    "labels": [],
+    "merged": false,
+    "number": 43911,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "add Llama to mapping names in tokenization_auto.py",
+    "updated_at": "2026-02-18T10:58:59Z"
+  },
+  {
+    "additions": 17,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43910",
+    "created_at": "2026-02-11T09:43:08Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43910/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43910",
+    "labels": [],
+    "merged": true,
+    "number": 43910,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Update expected output for Jais2 model tests",
+    "updated_at": "2026-04-09T02:32:44Z"
+  },
+  {
+    "additions": 13,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43907",
+    "created_at": "2026-02-11T08:30:31Z",
+    "deletions": 36,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43907/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43907",
+    "labels": [],
+    "merged": true,
+    "number": 43907,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "update glm image model expected out for tests",
+    "updated_at": "2026-04-09T02:32:46Z"
+  },
+  {
+    "additions": 31,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title. Enable the corresponding tests and re-enable the tests that were skipped before.",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43905",
+    "created_at": "2026-02-11T05:31:42Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43905/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43905",
+    "labels": [],
+    "merged": true,
+    "number": 43905,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "XPU now supports backward for the FA2 fixed path",
+    "updated_at": "2026-02-26T10:48:28Z"
+  },
+  {
+    "additions": 3,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? removes two unused dependencies.",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43904",
+    "created_at": "2026-02-11T02:48:18Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43904/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43904",
+    "labels": [],
+    "merged": true,
+    "number": 43904,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Remove unused dependencies",
+    "updated_at": "2026-02-18T01:04:31Z"
+  },
+  {
+    "additions": 0,
+    "author": "math-hiyoko",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43903",
+    "created_at": "2026-02-11T01:44:06Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43903/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43903",
+    "labels": [],
+    "merged": true,
+    "number": 43903,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: TextClassificationPipeline docs mentioning deprecated return_all_scores",
+    "updated_at": "2026-02-11T17:13:26Z"
+  },
+  {
+    "additions": 23,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes and removes more outdated documentation contents <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the t\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43902",
+    "created_at": "2026-02-11T01:39:07Z",
+    "deletions": 164,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43902/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43902",
+    "labels": [],
+    "merged": true,
+    "number": 43902,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix old tech stack in doc",
+    "updated_at": "2026-02-11T13:23:56Z"
+  },
+  {
+    "additions": 165,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "refactors tokenizer docs for v5: - describes new unified tokenization backend architecture and how it works - empty tokenizer initialization with model-specific tokenizer - update structure to mirror developer workflow (load -> encode/deco\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43900",
+    "created_at": "2026-02-11T00:42:19Z",
+    "deletions": 314,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43900/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43900",
+    "labels": [],
+    "merged": true,
+    "number": 43900,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "[docs] refactor tokenizer docs",
+    "updated_at": "2026-02-17T17:50:01Z"
+  },
+  {
+    "additions": 5,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43898",
+    "created_at": "2026-02-10T17:37:34Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43898/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43898",
+    "labels": [],
+    "merged": true,
+    "number": 43898,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "adding BC for custom toks accessing slow tok attrs deprecated in v5",
+    "updated_at": "2026-02-10T21:21:33Z"
+  },
+  {
+    "additions": 17,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Although our cache update methods are usually used in inference, when grad is disabled anyway, there seem to be some edge cases where they cause problems with compilation and gradient computation. Since we never want to propagate gradient\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43897",
+    "created_at": "2026-02-10T17:15:15Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43897/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43897",
+    "labels": [],
+    "merged": true,
+    "number": 43897,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Decorate cache updates with no_grad, just in case",
+    "updated_at": "2026-02-11T15:31:40Z"
+  },
+  {
+    "additions": 427,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As discussed in https://github.com/huggingface/transformers/pull/43820#pullrequestreview-3780031785, expected outputs of DAC need to be recomputed Reasoning: outputs were changed with https://github.com/huggingface/\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43896",
+    "created_at": "2026-02-10T16:49:32Z",
+    "deletions": 304,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43896/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43896",
+    "labels": [],
+    "merged": true,
+    "number": 43896,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix expected DAC outputs due to (old) change in CI settings.",
+    "updated_at": "2026-02-10T17:47:59Z"
+  },
+  {
+    "additions": 8,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR reverts `place_model_on_device` back to a property. I prefer not introducing new args for now.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43895",
+    "created_at": "2026-02-10T15:06:36Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43895",
+    "labels": [],
+    "merged": true,
+    "number": 43895,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "revert place_model_on_device to property",
+    "updated_at": "2026-02-11T10:37:06Z"
+  },
+  {
+    "additions": 165,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module - fix mlp layers mismatch <!-- Congratulations! You've made it this far! You\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43894",
+    "created_at": "2026-02-10T14:56:42Z",
+    "deletions": 124,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43894/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43894",
+    "labels": [],
+    "merged": false,
+    "number": 43894,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "[fix] GlmMoeDsa model",
+    "updated_at": "2026-03-04T16:30:34Z"
+  },
+  {
+    "additions": 55,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #43854. Long term we should probably have it in the post-init to share them @Cyrilvallez I am lazy today",
+    "changed_files": 20,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43893",
+    "created_at": "2026-02-10T14:16:10Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43893/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43893",
+    "labels": [],
+    "merged": true,
+    "number": 43893,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Move `_keys_to_ignore_on_load_missing` for now",
+    "updated_at": "2026-02-11T13:47:43Z"
+  },
+  {
+    "additions": 1,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43892",
+    "created_at": "2026-02-10T13:07:29Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43892/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43892",
+    "labels": [],
+    "merged": true,
+    "number": 43892,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update hub metadata",
+    "updated_at": "2026-02-10T15:53:51Z"
+  },
+  {
+    "additions": 36,
+    "author": "Mercury0226",
+    "author_association": "NONE",
+    "body_excerpt": "Closed PR. Test-only proposal for #7715.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43891",
+    "created_at": "2026-02-10T12:04:08Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43891/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43891",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43891,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "tests: extend RAG retriever smoke coverage (#7715)",
+    "updated_at": "2026-02-10T13:26:43Z"
+  },
+  {
+    "additions": 54,
+    "author": "Mercury0226",
+    "author_association": "NONE",
+    "body_excerpt": "Closed PR. Test-only proposal for #6045.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43890",
+    "created_at": "2026-02-10T11:48:59Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43890/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43890",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43890,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "tests: add BART memory regression guard test (#6045)",
+    "updated_at": "2026-02-10T13:26:45Z"
+  },
+  {
+    "additions": 24,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43889",
+    "created_at": "2026-02-10T11:25:01Z",
+    "deletions": 18,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43889/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43889",
+    "labels": [],
+    "merged": true,
+    "number": 43889,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Jamba`] Fallback to slow path and warn instead of error out",
+    "updated_at": "2026-02-10T12:35:49Z"
+  },
+  {
+    "additions": 2075,
+    "author": "bhargav-patel-29",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for **Param-2-17B-MoE-A2.4B**, a large-scale Mixture-of-Experts (MoE) causal language model. Param-2-17B-MoE-A2.4B uses a **Hybrid Dense + MoE architecture** with 17B total parameters while acti\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43888",
+    "created_at": "2026-02-10T11:02:13Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43888/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43888",
+    "labels": [],
+    "merged": false,
+    "number": 43888,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Support for BharatGen's Param2MoE model architecture",
+    "updated_at": "2026-04-16T09:31:16Z"
+  },
+  {
+    "additions": 12,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? MistralCommonBackend does not implement `added_tokens_decoder` so any call to PreTrainedTokenizerBase's `__repr__ `fails.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43887",
+    "created_at": "2026-02-10T10:55:08Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43887/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43887",
+    "labels": [],
+    "merged": true,
+    "number": 43887,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[MistralCommonBackend] fix loading proc",
+    "updated_at": "2026-02-10T12:32:24Z"
+  },
+  {
+    "additions": 20,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43878 After the refactor we started saving `out_features` and `stage_names` in timm backbone config, because it now also inherits from `BackboneConfigMixin`. But the modeling code wo\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43886",
+    "created_at": "2026-02-10T10:10:09Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43886/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43886",
+    "labels": [],
+    "merged": true,
+    "number": 43886,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Timm backbone saves and loads `out_features`",
+    "updated_at": "2026-02-12T15:57:04Z"
+  },
+  {
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43884",
+    "created_at": "2026-02-10T09:55:40Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43884/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43884",
+    "labels": [],
+    "merged": false,
+    "number": 43884,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "don't merge check workflow",
+    "updated_at": "2026-02-13T02:03:43Z"
+  },
+  {
+    "additions": 3,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Resolves https://github.com/huggingface/transformers/pull/42564#issuecomment-3874606093 #42564 updated `get_image_features` for Llama4, but it erroneously started using `pooler_output` instead of the previous `last_\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43882",
+    "created_at": "2026-02-10T08:38:51Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43882/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43882",
+    "labels": [],
+    "merged": true,
+    "number": 43882,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[`fix`] Use `last_hidden_state` key from `get_image_features` for llama4",
+    "updated_at": "2026-02-10T15:05:50Z"
+  },
+  {
+    "additions": 14,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "After the \"Refactor weight loading\" commit (#41580), UMT5EncoderModel fails to properly tie encoder.embed_tokens.weight to shared.weight when loading checkpoints with tie_word_embeddings=False (e.g., Wan-AI video generation models). This c\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43880",
+    "created_at": "2026-02-10T07:26:07Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43880/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43880",
+    "labels": [],
+    "merged": true,
+    "number": 43880,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix UMT5EncoderModel embedding weights not being tied after loading",
+    "updated_at": "2026-04-20T02:29:37Z"
+  },
+  {
+    "additions": 17,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43879",
+    "created_at": "2026-02-10T06:42:49Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43879/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43879",
+    "labels": [],
+    "merged": true,
+    "number": 43879,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix old tech stack in doc",
+    "updated_at": "2026-02-10T13:58:11Z"
+  },
+  {
+    "additions": 39,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43874 by adding `get_number_of_image_patches` to `Glm46VImageProcessorFast`. `Glm46VProcessor._get_num_multimodal_tokens` calls this method on `self.image_processor`, which raises an `AttributeError` when the fast image p\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43877",
+    "created_at": "2026-02-10T04:50:19Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43877/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43877",
+    "labels": [],
+    "merged": true,
+    "number": 43877,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Fix missing fast image patch counter in Glm46V",
+    "updated_at": "2026-02-11T12:23:27Z"
+  },
+  {
+    "additions": 17,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR fixes #43864 by preserving the `GlmMoeDsaConfig` default `mlp_layer_types` from the modular source. `GlmMoeDsaConfig` should default to dense MLP for the first 3 layers and sparse afterward. During modular conversion, th\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43876",
+    "created_at": "2026-02-10T04:28:23Z",
+    "deletions": 95,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43876/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43876",
+    "labels": [],
+    "merged": true,
+    "number": 43876,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix GlmMoeDsaConfig default mlp_layer_types in modular conversion",
+    "updated_at": "2026-02-10T12:24:19Z"
+  },
+  {
+    "additions": 31,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Its reset should clear ```_quantized_keys``` and ```_quantized_values ```.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43875",
+    "created_at": "2026-02-10T02:56:22Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43875/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43875",
+    "labels": [],
+    "merged": false,
+    "number": 43875,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Improve handling of QuantizedLayer.reset",
+    "updated_at": "2026-02-11T00:46:54Z"
+  },
+  {
+    "additions": 6,
+    "author": "Tanmaygangurde20",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes #43867 This PR fixes a `TypeError` in the `dot_natural_key` function in `src/transformers/core_model_loading.py` that occurs when sorting model state dictionary keys with mixed numeric and string structures. #\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43871",
+    "created_at": "2026-02-09T20:19:43Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43871/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43871",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43871,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix TypeError in dot_natural_key when sorting mixed structure keys",
+    "updated_at": "2026-02-10T12:45:05Z"
+  },
+  {
+    "additions": 90,
+    "author": "daniel7an",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Adds `interpolate_pos_encoding` support to the **VitPoseBackbone** model, enabling pretrained checkpoints to be used on input images of different resolutions. This follows the same pattern established in other visi\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43870",
+    "created_at": "2026-02-09T19:20:34Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43870/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43870",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43870,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add interpolate_pos_encoding to VitPoseBackbone",
+    "updated_at": "2026-02-10T12:48:09Z"
+  },
+  {
+    "additions": 8,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "There were some leftover references to the TranslationPipeline, so this PR removes them. Includes the cleanup from #43826 as well as some other changes!",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43869",
+    "created_at": "2026-02-09T18:35:27Z",
+    "deletions": 99,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43869/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43869",
+    "labels": [],
+    "merged": true,
+    "number": 43869,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Remove remaining vestiges of the TranslationPipeline",
+    "updated_at": "2026-02-26T14:38:05Z"
+  },
+  {
+    "additions": 1,
+    "author": "thecaptain789",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes a simple typo in the timm backbones documentation. 'neccessary' \u2192 'necessary'",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43868",
+    "created_at": "2026-02-09T17:58:02Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43868/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43868",
+    "labels": [],
+    "merged": true,
+    "number": 43868,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: correct typo 'neccessary' to 'necessary'",
+    "updated_at": "2026-02-09T18:18:58Z"
+  },
+  {
+    "additions": 4,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43865",
+    "created_at": "2026-02-09T15:12:33Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43865/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43865",
+    "labels": [],
+    "merged": true,
+    "number": 43865,
+    "review_comments_count": 17,
+    "state": "closed",
+    "title": "update somes audio models",
+    "updated_at": "2026-02-13T10:25:47Z"
+  },
+  {
+    "additions": 5,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This allows to do something like: ```python from transformers import AutoProcessor from datasets import Audio, load_dataset model_id = \"openai/whisper-large-v3\" processor = AutoProcessor.from_pretrained(model_id) da\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43863",
+    "created_at": "2026-02-09T14:18:04Z",
+    "deletions": 16,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43863/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43863",
+    "labels": [],
+    "merged": false,
+    "number": 43863,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[whisper] allow to pass text/audio specific kwargs",
+    "updated_at": "2026-02-10T11:28:03Z"
+  },
+  {
+    "additions": 1642,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. `check_model_inputs` is becoming very complex and doing more than what it should. Let's separate into 2 clear decorators: - `capture_outputs`: everything related to capture outputs - `merge_with_co\u2026",
+    "changed_files": 261,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43862",
+    "created_at": "2026-02-09T13:37:05Z",
+    "deletions": 996,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43862/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43862",
+    "labels": [],
+    "merged": true,
+    "number": 43862,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Separate `check_model_inputs` into `capture_outputs` and `merge_with_config_defaults` + ensure correctness",
+    "updated_at": "2026-02-10T17:37:46Z"
+  },
+  {
+    "additions": 2,
+    "author": "thecaptain789",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43813 Corrects the typo in `src/transformers/integrations/peft.py` on lines 264 and 303 where `quantizatin_operations` should be `quantization_operations`.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43861",
+    "created_at": "2026-02-09T11:50:51Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43861/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43861",
+    "labels": [],
+    "merged": false,
+    "number": 43861,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: correct typo 'quantizatin_operations' to 'quantization_operations'",
+    "updated_at": "2026-02-25T13:25:05Z"
+  },
+  {
+    "additions": 1,
+    "author": "pavel-esir",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When tokenizer is red from `tokenizer.model` then model type whether it's `Unigram` or `BPE` is done incorrectly. Correct Unigram model type is 1 not 2 according to `SentencePiece` src https://github.com/google/sent\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43860",
+    "created_at": "2026-02-09T11:45:22Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43860/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43860",
+    "labels": [],
+    "merged": true,
+    "number": 43860,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update ModelType for Unigram tokenizer",
+    "updated_at": "2026-02-09T15:13:45Z"
+  },
+  {
+    "additions": 1719,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title!",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43858",
+    "created_at": "2026-02-09T10:28:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43858/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43858",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43858,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add GlmMoeDsa",
+    "updated_at": "2026-02-21T10:38:46Z"
+  },
+  {
+    "additions": 348,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Should just be a way to skip remote code if we want to",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43857",
+    "created_at": "2026-02-09T10:13:39Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43857/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43857",
+    "labels": [],
+    "merged": false,
+    "number": 43857,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Allow to bypass remote code if we want to try and convert it",
+    "updated_at": "2026-04-10T14:15:52Z"
+  },
+  {
+    "additions": 0,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "When all routing weights are zero, in this line [out_per_sample = out_per_sample * sample_weights.unsqueeze(-1)](https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/moe.py#L153) , it will also output all zer\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43855",
+    "created_at": "2026-02-09T08:40:45Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43855/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43855",
+    "labels": [],
+    "merged": true,
+    "number": 43855,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "delete unnecessary code to make moe compatible to full graph compile",
+    "updated_at": "2026-04-09T02:34:08Z"
+  },
+  {
+    "additions": 1,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- quantization: @SunMarc @MekkCyber ``` [rank0]: During handling of the above exception, another exception occurred: [rank0]: Traceback (most recent call last): [rank0]: File \"/mnt/disk3/wangyi/transformers/test_tp.py\", line 24, in <module\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43853",
+    "created_at": "2026-02-09T06:39:17Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43853/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43853",
+    "labels": [],
+    "merged": true,
+    "number": 43853,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix gptoss crash in tp",
+    "updated_at": "2026-02-09T10:42:33Z"
+  },
+  {
+    "additions": 41,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Currently, if slack reporting (re-usable) workflow or `check new failure` workflow fails, the CI triggered via a comment in a pull request will send a comment back to the PR page with \u2705 No failing test specific to t\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43852",
+    "created_at": "2026-02-09T05:43:44Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43852/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43852",
+    "labels": [],
+    "merged": true,
+    "number": 43852,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "don't merge check",
+    "updated_at": "2026-02-10T09:45:42Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43851",
+    "created_at": "2026-02-09T03:29:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43851/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43851",
+    "labels": [],
+    "merged": true,
+    "number": 43851,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix slack-report workflow file",
+    "updated_at": "2026-02-09T05:41:32Z"
+  },
+  {
+    "additions": 2,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - remove `pipeline()`-based inference examples from the summarization and translation task docs - keep only direct `AutoTokenizer` + `AutoModelForSeq2SeqLM.generate` examples, which match v5 behavior ## Validation - `grep -R --l\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43850",
+    "created_at": "2026-02-09T02:34:44Z",
+    "deletions": 25,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43850/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43850",
+    "labels": [],
+    "merged": false,
+    "number": 43850,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove stale pipeline inference examples from v5 task docs",
+    "updated_at": "2026-02-09T12:24:03Z"
+  },
+  {
+    "additions": 42,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - only treat `translation_XX_to_YY` as a special task when the `translation` pipeline is actually registered - stop advertising `translation_XX_to_YY` in unknown-task error messages when translation is not supported - add regres\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43849",
+    "created_at": "2026-02-09T02:31:22Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43849/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43849",
+    "labels": [],
+    "merged": false,
+    "number": 43849,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix translation task validation when translation pipeline is unavailable",
+    "updated_at": "2026-03-03T09:42:38Z"
+  },
+  {
+    "additions": 70,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43698 by adding SwanLab resume init argument support in the Trainer integration. ## What changed - `SwanLabCallback.setup()` now forwards two optional env-based init args to `swanlab.init(...)`: - `SWANLAB_RUN_ID` -> `id`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43848",
+    "created_at": "2026-02-09T02:10:15Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43848/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43848",
+    "labels": [],
+    "merged": true,
+    "number": 43848,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix SwanLab callback to forward resume init args",
+    "updated_at": "2026-02-10T12:57:08Z"
+  },
+  {
+    "additions": 39,
+    "author": "tohtana",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When using `HfDeepSpeedConfig` + ZeRO-3 with `from_config()`, the model's custom weight initialization (`_init_weights`) is silently skipped because parameters are already partitioned by `deepspeed.zero.Init`. The m\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43847",
+    "created_at": "2026-02-08T22:04:44Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43847/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43847",
+    "labels": [],
+    "merged": false,
+    "number": 43847,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": " Fix _from_config silently skipping weight initialization under DeepSpeed ZeRO-3",
+    "updated_at": "2026-02-27T18:50:02Z"
+  },
+  {
+    "additions": 108,
+    "author": "vasanthrpjan1-boop",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Add support for logging training metrics to the Pluto experiment tracker (https://pluto.trainy.ai/) as a native Trainer callback. This provides an alternative for users migrating from Neptune given its upcoming shutdown. Closes #43724 # Wh\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43843",
+    "created_at": "2026-02-08T14:29:51Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43843/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43843",
+    "labels": [],
+    "merged": false,
+    "number": 43843,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add PlutoCallback integration for Pluto experiment tracker",
+    "updated_at": "2026-02-08T14:30:36Z"
+  },
+  {
+    "additions": 3,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `NameError: name 'TypeAdapter' is not defined` error when importing transformers without pydantic installed. ## Problem The `TypeAdapter` class from pydantic was used as a type annotation in `_validate_requ\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43656-4",
+    "cluster_ids": [
+      "cluster-43656-4"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43842",
+    "created_at": "2026-02-08T13:01:19Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43842/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43842",
+    "labels": [],
+    "merged": false,
+    "number": 43842,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix(cli): Fix TypeAdapter NameError when pydantic is not installed",
+    "updated_at": "2026-02-08T16:08:04Z"
+  },
+  {
+    "additions": 7,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a bug where `SwitchTransformersConfig` incorrectly creates sparse layers when `num_sparse_encoder_layers=0` or `num_sparse_decoder_layers=0` is set with a single-layer model. ## Problem When `num_sparse_encod\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43841",
+    "created_at": "2026-02-08T12:58:27Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43841/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43841",
+    "labels": [],
+    "merged": false,
+    "number": 43841,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(switch_transformers): Fix sparse layer creation when num_sparse_*_layers=0",
+    "updated_at": "2026-02-09T13:00:01Z"
+  },
+  {
+    "additions": 2,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes incorrect timestamp calculation in Qwen3VL Processor by using `temporal_patch_size` instead of `merge_size`. ## Problem The `_calculate_timestamps()` method was being called with `self.video_processor.merge_s\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43840",
+    "created_at": "2026-02-08T12:55:28Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43840/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43840",
+    "labels": [],
+    "merged": false,
+    "number": 43840,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(qwen3_vl): Use temporal_patch_size for timestamp calculation",
+    "updated_at": "2026-02-09T08:56:08Z"
+  },
+  {
+    "additions": 9,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `RuntimeError: expected mat1 and mat2 to have the same dtype` error when using `torch.autocast` with MoE models like `microsoft/Phi-tiny-MoE-instruct`. ## Problem `torch._grouped_mm` is not autocast-enabled\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43839",
+    "created_at": "2026-02-08T12:21:19Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43839/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43839",
+    "labels": [],
+    "merged": true,
+    "number": 43839,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(moe): Handle dtype mismatch in torch._grouped_mm with autocast",
+    "updated_at": "2026-02-11T14:58:48Z"
+  },
+  {
+    "additions": 2267,
+    "author": "mbtariq82",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds Qwen3-ASR to the Transformers library. Fixes #43837 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [co\u2026",
+    "changed_files": 21,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43838",
+    "created_at": "2026-02-08T12:05:43Z",
+    "deletions": 58,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43838/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43838",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43838,
+    "review_comments_count": 30,
+    "state": "open",
+    "title": "Proposal to add Qwen3-ASR support [WIP]",
+    "updated_at": "2026-04-16T15:39:33Z"
+  },
+  {
+    "additions": 79,
+    "author": "pragnyanramtha",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43824 what i think happened in #43824 is that waltwalt36 did not install the optional dependencies like pydantic, causing this issue. According to the core architecture docs, transformers implements a lazy loading mechanism for impo\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43656-4",
+    "cluster_ids": [
+      "cluster-43656-4"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43836",
+    "created_at": "2026-02-08T11:28:31Z",
+    "deletions": 70,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43836/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43836",
+    "labels": [],
+    "merged": false,
+    "number": 43836,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "fix: wrapped TypeAdpater in string literals (for now)",
+    "updated_at": "2026-02-17T04:46:27Z"
+  },
+  {
+    "additions": 5,
+    "author": "nulone",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43828 ## What does this PR do? `torch._grouped_mm` is not registered for autocast. Under `torch.autocast`, LayerNorm outputs float32 while model weights stay bfloat16, causing RuntimeError: \"expected mat1 and mat2 to have same dtype\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43833",
+    "created_at": "2026-02-08T07:26:06Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43833/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43833",
+    "labels": [],
+    "merged": false,
+    "number": 43833,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: ensure dtype consistency in grouped_mm under autocast",
+    "updated_at": "2026-02-11T02:28:43Z"
+  },
+  {
+    "additions": 0,
+    "author": "nulone",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43827 ## What does this PR do? Removes deprecated `pipeline()` examples from summarization.md and translation.md that reference pre-v5 API. The manual `model.generate()` approach is preserved. ## Before submitting - [x] This PR fixe\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43832",
+    "created_at": "2026-02-08T07:06:47Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43832/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43832",
+    "labels": [],
+    "merged": false,
+    "number": 43832,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: remove deprecated pipeline examples from summarization and tran\u2026",
+    "updated_at": "2026-02-08T07:19:52Z"
+  },
+  {
+    "additions": 0,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Removes `pipeline()`-based inference examples from summarization and translation task documentation - These examples no longer work in v5 since `SummarizationPipeline` and `TranslationPipeline` were removed ## Background Accor\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43831",
+    "created_at": "2026-02-08T06:39:23Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43831/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43831",
+    "labels": [],
+    "merged": true,
+    "number": 43831,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] Remove pipeline() examples from summarization/translation tasks",
+    "updated_at": "2026-02-09T12:33:04Z"
+  },
+  {
+    "additions": 7792,
+    "author": "bozheng-hit",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR adds the support of codes for the upcoming Qwen3.5 series models. For information about Qwen, please visit: \ud83d\udc49https://qwen.ai Special thanks to @JJJYmmm for helping complete the code in this PR. We also appreciate the valuable feedb\u2026",
+    "changed_files": 28,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43830",
+    "created_at": "2026-02-08T05:51:57Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43830/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43830",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43830,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Adding Support for Qwen3.5",
+    "updated_at": "2026-03-03T02:26:31Z"
+  },
+  {
+    "additions": 30,
+    "author": "jayzuccarelli",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43805 Follow-up to #43794: add a pytest fixture that sets a fixed seed (42) before each test so we always get the same RNG state in model tests and improve determinism. - **`tests/conftest.py`** (new): `set_seed` fixture with `autou\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43829",
+    "created_at": "2026-02-08T05:10:32Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43829/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43829",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43829,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "chore(tests): add set_seed pytest fixture for determinism",
+    "updated_at": "2026-02-10T01:55:12Z"
+  },
+  {
+    "additions": 2,
+    "author": "math-hiyoko",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43826",
+    "created_at": "2026-02-08T01:49:06Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43826/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43826",
+    "labels": [],
+    "merged": false,
+    "number": 43826,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: error message of pipeline",
+    "updated_at": "2026-02-09T13:26:30Z"
+  },
+  {
+    "additions": 2045,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43823",
+    "created_at": "2026-02-07T20:24:42Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43823/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43823",
+    "labels": [],
+    "merged": false,
+    "number": 43823,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add `facebook/MobileLLM-125M`",
+    "updated_at": "2026-02-09T11:48:23Z"
+  },
+  {
+    "additions": 15,
+    "author": "veeceey",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43784 When PyTorch < 2.4 is installed, transformers v5.x disables PyTorch by making `is_torch_available()` return `False`. This causes the conditional import of `torch.nn as nn` (line 42) to be skipped. Howe\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43822",
+    "created_at": "2026-02-07T19:20:43Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43822/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43822",
+    "labels": [],
+    "merged": false,
+    "number": 43822,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix NameError: name 'nn' is not defined when PyTorch < 2.4",
+    "updated_at": "2026-02-09T12:53:52Z"
+  },
+  {
+    "additions": 2,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43821",
+    "created_at": "2026-02-07T19:04:11Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43821/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43821",
+    "labels": [],
+    "merged": true,
+    "number": 43821,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix typo in quantization_operations in PEFT integrations",
+    "updated_at": "2026-02-16T17:59:57Z"
+  },
+  {
+    "additions": 10,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Implemented a fix that applies the straight-through estimator to each latent chunk before `out_proj`, following the straight-through estimator pattern use\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43820",
+    "created_at": "2026-02-07T14:44:51Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43820/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43820",
+    "labels": [],
+    "merged": true,
+    "number": 43820,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(models): Apply STE in Dac.from_latents to match the forward pass",
+    "updated_at": "2026-04-18T08:33:47Z"
+  },
+  {
+    "additions": 135,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is a follow-up work on trying to fix flakiness. Adding Global Deterministic Testing # Deterministic Testing Infrastructure - Summary N.B. this is for CPU-only tests ## Problem The test suite has flaky tests that failed intermittently\u2026",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43817",
+    "created_at": "2026-02-07T08:33:51Z",
+    "deletions": 57,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43817/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43817",
+    "labels": [],
+    "merged": false,
+    "number": 43817,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(flaky): use a fixture for `set_seed` and single-threading [WIP]",
+    "updated_at": "2026-03-19T10:29:23Z"
+  },
+  {
+    "additions": 16,
+    "author": "thecaptain789",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43698 Adds support for run resumption in the SwanLab integration via two new environment variables: - `SWANLAB_RUN_ID`: Unique identifier for the run - `SWANLAB_RESUME`: Controls resumption behavior (`must`, `allow`, `never`, `auto`\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43698-3",
+    "cluster_ids": [
+      "cluster-43698-3"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43816",
+    "created_at": "2026-02-07T03:55:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43816/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43816",
+    "labels": [],
+    "merged": false,
+    "number": 43816,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: add id and resume parameters to SwanLab integration",
+    "updated_at": "2026-02-09T09:44:45Z"
+  },
+  {
+    "additions": 1,
+    "author": "zyoraclub",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43815",
+    "created_at": "2026-02-07T03:24:08Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43815/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43815",
+    "labels": [],
+    "merged": false,
+    "number": 43815,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Add missing import statement for os module",
+    "updated_at": "2026-02-09T06:28:30Z"
+  },
+  {
+    "additions": 0,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This EncoderDecoderCache.batch_split is a remaining method from previous refactoring and is not used by other code.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43814",
+    "created_at": "2026-02-07T03:04:26Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43814/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43814",
+    "labels": [],
+    "merged": true,
+    "number": 43814,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Delete batch_split from EncoderDecoderCache",
+    "updated_at": "2026-02-09T13:14:21Z"
+  },
+  {
+    "additions": 5,
+    "author": "tobyliu2004",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43784 Fixes `NameError: name 'nn' is not defined` when importing transformers with PyTorch < 2.4. ## The Issue When PyTorch < 2.4 is detected, transformers disables PyTorch by making `is_torch_available()` re\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43812",
+    "created_at": "2026-02-06T19:53:01Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43812/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43812",
+    "labels": [],
+    "merged": false,
+    "number": 43812,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix: Import torch.nn for type hints even when PyTorch is disabled",
+    "updated_at": "2026-02-07T09:35:29Z"
+  },
+  {
+    "additions": 6,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The test `tests/models/pe_audio_video/test_modeling_pe_audio_video.py::PeAudioVideoEncoderTest::test_model_forward_default_config_values` is flaky in the CI. In local testing, it failed in 5 out of 100 runs for me. After some digging, I fi\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43811",
+    "created_at": "2026-02-06T19:46:15Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43811/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43811",
+    "labels": [],
+    "merged": true,
+    "number": 43811,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix video interpolation in pe_audio_video",
+    "updated_at": "2026-02-09T12:51:52Z"
+  },
+  {
+    "additions": 10,
+    "author": "michaelfeil",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43810",
+    "created_at": "2026-02-06T18:24:13Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43810/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43810",
+    "labels": [],
+    "merged": false,
+    "number": 43810,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add bidirectional attention to qwen and llama configs",
+    "updated_at": "2026-02-07T17:40:34Z"
+  },
+  {
+    "additions": 90,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "Trainer is going to be heavily refactored over the coming weeks/months (see #43595). To avoid the recurring pattern: PR merges in `transformers` \u2192 TRL CI breaks \u2192 follow-up PR in `transformers` to fix something we could have caught earlier\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43809",
+    "created_at": "2026-02-06T17:05:13Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43809/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43809",
+    "labels": [],
+    "merged": true,
+    "number": 43809,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add TRL CI bot workflow to trigger tests on PR comments",
+    "updated_at": "2026-02-06T17:36:59Z"
+  },
+  {
+    "additions": 23,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "expands the base classes, inference, and training sections as these are quite important and will give readers a better idea of what's available",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43808",
+    "created_at": "2026-02-06T16:50:11Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43808/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43808",
+    "labels": [],
+    "merged": true,
+    "number": 43808,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] reveal more in toctree",
+    "updated_at": "2026-02-06T22:29:26Z"
+  },
+  {
+    "additions": 319,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR simplifies Trainer `__init__`: - Quantization validation extracted - PEFT unwrapping deduplicated - Liger Kernel extracted \u2014 apply_liger_kernel - Label smoother simplified - Validations grouped \u2014 `_validate_\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43807",
+    "created_at": "2026-02-06T16:18:12Z",
+    "deletions": 250,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43807/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43807",
+    "labels": [],
+    "merged": true,
+    "number": 43807,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "Refactor trainer init",
+    "updated_at": "2026-02-10T15:00:21Z"
+  },
+  {
+    "additions": 915,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates TrainingArguments in several places: - More tests - Simplify the post_init - update and reorder __init__ For the review, just check that the post_init is correctly modified",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43806",
+    "created_at": "2026-02-06T16:16:45Z",
+    "deletions": 639,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43806/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43806",
+    "labels": [],
+    "merged": true,
+    "number": 43806,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Update TrainingArguments",
+    "updated_at": "2026-02-24T12:32:04Z"
+  },
+  {
+    "additions": 31,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The difference check was returning False 40% of the times and was reproducible locally tested with ``` pytest -svx tests/models/ernie4_5_moe/test_modeling_ernie4_5_moe.py -k test_load_balancing_loss --flake-finder `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43804",
+    "created_at": "2026-02-06T14:24:50Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43804/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43804",
+    "labels": [],
+    "merged": true,
+    "number": 43804,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(flaky): Different approach to make sure loss exists",
+    "updated_at": "2026-02-20T07:45:27Z"
+  },
+  {
+    "additions": 2,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, I merged #43772 after #43400 which caused this mismatch - causes red CI",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43803",
+    "created_at": "2026-02-06T14:14:58Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43803/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43803",
+    "labels": [],
+    "merged": true,
+    "number": 43803,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Repo Consistency`] Fix rms norm",
+    "updated_at": "2026-02-06T14:59:20Z"
+  },
+  {
+    "additions": 7,
+    "author": "Sankalpkumarsingh1234",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR adds a short real-world use case section to help users understand practical applications of text summarization. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, y\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43802",
+    "created_at": "2026-02-06T13:41:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43802/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43802",
+    "labels": [],
+    "merged": false,
+    "number": 43802,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add real-world use case section for text summarization",
+    "updated_at": "2026-02-09T12:14:45Z"
+  },
+  {
+    "additions": 8,
+    "author": "robell",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "When using the torch.export path with this model we hit an issue on correction_coefs being modified with unknown side-effects the dynamo can't catch. So it lifts this tensor unnecessarily, and fails assuming mutation is needed RuntimeError\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43801",
+    "created_at": "2026-02-06T13:26:58Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43801/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43801",
+    "labels": [],
+    "merged": true,
+    "number": 43801,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix lifted_tensor in Gemma3n export which dynamo can't reason about",
+    "updated_at": "2026-02-12T09:58:33Z"
+  },
+  {
+    "additions": 13182,
+    "author": "harshang03",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR makes remove_unused_columns=True customizable for trainers that transform dataset columns before calling the model. It introduces an optional signature_columns argument on Trainer to control which dataset co\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43800",
+    "created_at": "2026-02-06T13:07:32Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43800/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43800",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43800,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add new Trainer utilities and documentation",
+    "updated_at": "2026-02-06T13:42:08Z"
+  },
+  {
+    "additions": 14,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? With this change: https://github.com/huggingface/accelerate/pull/3916 there is no torch device mesh when sp_backend=\"deepspeed\", and transformers currently assumes it exists. That\u2019s why you get: ``` sp_group = self.\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43799",
+    "created_at": "2026-02-06T13:03:20Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43799/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43799",
+    "labels": [],
+    "merged": true,
+    "number": 43799,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "[Trainer] use deepspeed SP process group when Accelerate doesn\u2019t build a mesh",
+    "updated_at": "2026-02-06T16:15:28Z"
+  },
+  {
+    "additions": 15,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **BLOOM:** `batch_encode_plus()` method was removed from `PreTrainedTokenizerBase` in commit 05c0e1d390 (the \"rm slow tokenizers\" refactor, #4093\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43798",
+    "created_at": "2026-02-06T13:00:13Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43798/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43798",
+    "labels": [],
+    "merged": true,
+    "number": 43798,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(testing): Fix BLOOM tokenizer, CLAP audio features, and CLVP text tester usage in tests",
+    "updated_at": "2026-04-18T08:34:42Z"
+  },
+  {
+    "additions": 88,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Let's see if we can fix that flaky",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43797",
+    "created_at": "2026-02-06T12:14:02Z",
+    "deletions": 31,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43797",
+    "labels": [],
+    "merged": false,
+    "number": 43797,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "preventing I/O errors on closed streams in the `cli` helper",
+    "updated_at": "2026-02-06T15:03:26Z"
+  },
+  {
+    "additions": 37,
+    "author": "hemanth678901-stack",
+    "author_association": "NONE",
+    "body_excerpt": "This PR clarifies a common misconception around GPU memory usage during training, particularly when using the Adafactor optimizer with DeepSpeed. Several users expect Adafactor to always reduce peak GPU memory due to reduced optimizer stat\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43796",
+    "created_at": "2026-02-06T11:55:33Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43796/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43796",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43796,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Clarify GPU memory expectations when using Adafactor with DeepSpeed",
+    "updated_at": "2026-02-06T13:37:49Z"
+  },
+  {
+    "additions": 1464,
+    "author": "liu-jiaxuan",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
+    "changed_files": 20,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43795",
+    "created_at": "2026-02-06T11:22:56Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43795/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43795",
+    "labels": [],
+    "merged": false,
+    "number": 43795,
+    "review_comments_count": 38,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRv5_server_rec Model Support",
+    "updated_at": "2026-03-18T16:33:35Z"
+  },
+  {
+    "additions": 178,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This patch aims to reduce flakiness in CI tests. We identified two causes of nondeterministic behavior: - Some tests were not using a fixed RNG seed, which reduced determinism. - The cli tests were occasionally triggering I/O errors due to\u2026",
+    "changed_files": 28,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43794",
+    "created_at": "2026-02-06T09:54:59Z",
+    "deletions": 114,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43794/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43794",
+    "labels": [],
+    "merged": true,
+    "number": 43794,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Enforce manual seed to reduce flakiness",
+    "updated_at": "2026-02-06T16:30:31Z"
+  },
+  {
+    "additions": 5225,
+    "author": "liu-jiaxuan",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43793",
+    "created_at": "2026-02-06T09:34:48Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43793/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43793",
+    "labels": [],
+    "merged": false,
+    "number": 43793,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRV5_mobile_rec Model Support",
+    "updated_at": "2026-03-19T10:50:04Z"
+  },
+  {
+    "additions": 25,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "When saving a tokenizer to a local directory and reloading it, the tokenizer type could change to an incorrect class (or fall back to TokenizersBackend) if the directory name contained a model type substring. Example: ```python tokenizer =\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43791",
+    "created_at": "2026-02-06T08:03:34Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43791/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43791",
+    "labels": [],
+    "merged": true,
+    "number": 43791,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "fix: Prevent AutoTokenizer type mismatch from directory name substrin\u2026",
+    "updated_at": "2026-02-17T13:23:42Z"
+  },
+  {
+    "additions": 6,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? So CircleCI will run with torch 2.10 CI is \u2705 see [here](https://app.circleci.com/pipelines/github/huggingface/transformers/163136/workflows/58ba2617-481b-4b4e-a456-d327806af088)",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43790",
+    "created_at": "2026-02-06T07:58:04Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43790/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43790",
+    "labels": [],
+    "merged": true,
+    "number": 43790,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "unpin torch for CircleCI",
+    "updated_at": "2026-02-06T11:48:30Z"
+  },
+  {
+    "additions": 268,
+    "author": "Jereshea",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR introduces a `token_latency` feature to the generation. When enabled, it allows users to capture the execution time of each generation step, facilitating the measurement of key performance metrics like **Tim\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43789",
+    "created_at": "2026-02-06T07:04:37Z",
+    "deletions": 52,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43789/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43789",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43789,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Support to measure TTFT & TPOT",
+    "updated_at": "2026-02-06T13:32:43Z"
+  },
+  {
+    "additions": 18,
+    "author": "LynchXLQ",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43698 This PR adds support for `id` and `resume` parameters in SwanLabCallback, enabling experiment continuation when resuming training. ### Changes - Added `SWANLAB_RUN_ID` environment variable support for\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43788",
+    "created_at": "2026-02-06T02:29:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43788/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43788",
+    "labels": [],
+    "merged": false,
+    "number": 43788,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add id and resume support for SwanLab integration",
+    "updated_at": "2026-02-09T16:18:39Z"
+  },
+  {
+    "additions": 50,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR removes or replaces code to use APIs from PyTorch 2.4+.",
+    "changed_files": 24,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43787",
+    "created_at": "2026-02-06T02:11:17Z",
+    "deletions": 95,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43787/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43787",
+    "labels": [],
+    "merged": true,
+    "number": 43787,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Remove unnecessary code or checks for PT 2.4+",
+    "updated_at": "2026-02-12T01:59:20Z"
+  },
+  {
+    "additions": 32,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Many users and tools use the OpenAI chat format for image inputs: `{\"type\": \"image_url\", \"image_url\": {\"url\": \"https://example.com/image.jpg\"}}` However, apply_chat_template only recognizes the HuggingFace-native format: `{\"type\": \"image\",\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43786",
+    "created_at": "2026-02-06T01:50:42Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43786/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43786",
+    "labels": [],
+    "merged": true,
+    "number": 43786,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add openAI style `image_url` content support in `apply_chat_template`",
+    "updated_at": "2026-04-09T02:33:09Z"
+  },
+  {
+    "additions": 85,
+    "author": "MengAiDev",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- Add _is_hf_initialized flag in _load_parameter_into_model to prevent unnecessary random initialization - Skip state_dict loading for non-rank0 processes when FSDP is enabled to avoid wasting CPU RAM - This fixes the issue where all ranks\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43785",
+    "created_at": "2026-02-06T00:46:08Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43785/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43785",
+    "labels": [],
+    "merged": false,
+    "number": 43785,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Fix FSDP_CPU_RAM_EFFICIENT_LOADING (#43749)",
+    "updated_at": "2026-02-11T11:17:53Z"
+  },
+  {
+    "additions": 55,
+    "author": "chry-santhemum",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? EDIT: see the discussion here https://github.com/huggingface/transformers/pull/43783#discussion_r2795768079 Add a check for deepspeed_zero3 in `_init_weights` for `nn.Embedding`. When initializing weights for `nn.Em\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43783",
+    "created_at": "2026-02-05T22:42:49Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43783/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43783",
+    "labels": [],
+    "merged": false,
+    "number": 43783,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "fix: ZeRO-3 crash for non-pretrained BERT in _init_weights",
+    "updated_at": "2026-02-18T03:24:28Z"
+  },
+  {
+    "additions": 156,
+    "author": "gabe-l-hart",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Only overwrite the pretrained_model_name_or_path if needed with adapter The check is based on the assumption that if the current value is a path on disk and there is a `config.json` present in that path, the path points to a full model che\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43781",
+    "created_at": "2026-02-05T21:39:09Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43781/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43781",
+    "labels": [],
+    "merged": true,
+    "number": 43781,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "fix: Load model with co-located adapter from local path (Granite Speech)",
+    "updated_at": "2026-03-05T14:14:43Z"
+  },
+  {
+    "additions": 2,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "The changes in #43711 caused the model to be never prepared when using DeepSpeed. When training you hit for example: ``` [rank0]: Traceback (most recent call last): [rank0]: File \"/fsx/qgallouedec/trl/trl/scripts/grpo.py\", line 193, in <mo\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43780",
+    "created_at": "2026-02-05T20:59:07Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43780/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43780",
+    "labels": [],
+    "merged": true,
+    "number": 43780,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix DeepSpeed model preparation logic in Trainer class",
+    "updated_at": "2026-02-06T19:20:01Z"
+  },
+  {
+    "additions": 3,
+    "author": "surya10602",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Summary This PR updates `SwanLabCallback` to accept `**kwargs` in its `__init__` method and passes them to `swanlab.init()` during setup. Previously, the SwanLab integration did not expose important initialization arguments like `experim\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43698-3",
+    "cluster_ids": [
+      "cluster-43698-3"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43779",
+    "created_at": "2026-02-05T20:57:14Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43779/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43779",
+    "labels": [],
+    "merged": false,
+    "number": 43779,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "SwanLab: Add support for id and resume arguments in SwanLabCallback",
+    "updated_at": "2026-02-09T09:43:49Z"
+  },
+  {
+    "additions": 93,
+    "author": "kevinli573",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Initializes dt_bias in the mixer class for both Mamba-1 and Mamba-2 (also initialized in PreTrainedModel class for meta device test). Addresses [#43717](https://github.com/huggingface/transformers/issues/43717)",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43778",
+    "created_at": "2026-02-05T20:38:41Z",
+    "deletions": 81,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43778/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43778",
+    "labels": [],
+    "merged": true,
+    "number": 43778,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Mamba-1/-2 init weights in mixer class",
+    "updated_at": "2026-03-03T02:27:54Z"
+  },
+  {
+    "additions": 2,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43777",
+    "created_at": "2026-02-05T19:25:53Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43777/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43777",
+    "labels": [],
+    "merged": true,
+    "number": 43777,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Bump dev version",
+    "updated_at": "2026-02-06T08:03:39Z"
+  },
+  {
+    "additions": 1598,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates and simplifies tests for trainer. We never really had any issues with those tests, just cleaning a bit. 1) `tests/trainer/test_data_collator.py` - Restructured from 4 large classes (PyTorch/NumPy \u00d7 I\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43776",
+    "created_at": "2026-02-05T18:44:30Z",
+    "deletions": 1907,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43776/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43776",
+    "labels": [],
+    "merged": true,
+    "number": 43776,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor trainer data_collator and callbacks tests",
+    "updated_at": "2026-02-06T16:06:16Z"
+  },
+  {
+    "additions": 154,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43688 The auxiliary load balancing loss in MoE models was not correctly normalized when `top_k > 1`. The `tokens_per_expert` distribution (f_i) was summing to K instead of 1, while `router_prob_per_expert` (P_i) sums to 1\u2026",
+    "changed_files": 22,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43775",
+    "created_at": "2026-02-05T16:48:34Z",
+    "deletions": 66,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43775/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43775",
+    "labels": [],
+    "merged": false,
+    "number": 43775,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix(moe): normalize auxiliary loss by top_k for correct load balancing",
+    "updated_at": "2026-02-05T18:14:28Z"
+  },
+  {
+    "additions": 930,
+    "author": "mbtariq82",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds a new feature to TrainingArguments and Trainer: activation_offloading. When enabled, intermediate activations are offloaded to CPU during training to reduce GPU memory usage, which is useful for large m\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43774",
+    "created_at": "2026-02-05T15:52:16Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43774/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43774",
+    "labels": [],
+    "merged": false,
+    "number": 43774,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add activation offloading to trainer",
+    "updated_at": "2026-03-04T20:47:27Z"
+  },
+  {
+    "additions": 8,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We had to do this twice already!",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43773",
+    "created_at": "2026-02-05T15:38:15Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43773/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43773",
+    "labels": [],
+    "merged": true,
+    "number": 43773,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix-release-ubild",
+    "updated_at": "2026-02-06T08:56:52Z"
+  },
+  {
+    "additions": 252,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, this led to weird dependencies where modeling files used direct imports",
+    "changed_files": 83,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43772",
+    "created_at": "2026-02-05T15:29:21Z",
+    "deletions": 216,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43772/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43772",
+    "labels": [],
+    "merged": true,
+    "number": 43772,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "[`Modular Dependencies`] Fixup qwen rms norms",
+    "updated_at": "2026-02-06T12:30:17Z"
+  },
+  {
+    "additions": 1525,
+    "author": "leoneperdigao",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary <!-- Add a brief summary of changes --> ## Related Issue Fixes #40170 **Issue:** Add MXFP4 MoE/attention backward kernels **URL:** https://github.com/huggingface/transformers/issues/40170 ## Problem ## A Call To Action! The Hugg\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43771",
+    "created_at": "2026-02-05T15:12:21Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43771/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43771",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43771,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: Add MXFP4 MoE/attention backward kernels",
+    "updated_at": "2026-03-24T14:14:44Z"
+  },
+  {
+    "additions": 47,
+    "author": "lordaarush",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Removes the unconditional `self.state.train_batch_size = self._train_batch_size` assignment that was causing issues when resuming from checkpoint with different batch configurations. The `train_batch_size` should on\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43770",
+    "created_at": "2026-02-05T14:25:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43770/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43770",
+    "labels": [],
+    "merged": true,
+    "number": 43770,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove unconditional train_batch_size assignment",
+    "updated_at": "2026-02-06T14:47:16Z"
+  },
+  {
+    "additions": 3950,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds voxtral realtime! ## benchmarks Using [this reproducer](https://gist.github.com/eustlb/367f062f77a5971291fb5350763bea8d), I've ran WER evals on ami, librispeech and fleurs, with results Dataset | Original (vllm\u2026",
+    "changed_files": 21,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43769",
+    "created_at": "2026-02-05T14:17:52Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43769/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43769",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": true,
+    "number": 43769,
+    "review_comments_count": 39,
+    "state": "closed",
+    "title": "Add Voxtral Realtime",
+    "updated_at": "2026-02-26T10:18:32Z"
+  },
+  {
+    "additions": 87,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Helps vLLM to bump to v5",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43768",
+    "created_at": "2026-02-05T14:04:02Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43768/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43768",
+    "labels": [],
+    "merged": true,
+    "number": 43768,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "Fix init weights in remote code",
+    "updated_at": "2026-02-17T14:45:18Z"
+  },
+  {
+    "additions": 850,
+    "author": "XingweiDeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43767",
+    "created_at": "2026-02-05T13:54:13Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43767/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43767",
+    "labels": [],
+    "merged": true,
+    "number": 43767,
+    "review_comments_count": 116,
+    "state": "closed",
+    "title": "[Model] Add PP-Chart2Table Model Support",
+    "updated_at": "2026-03-19T19:12:37Z"
+  },
+  {
+    "additions": 11,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "`convert_rope_params_to_dict` assumes that `rope_theta` and `partial_rotary_factor` will be present in `kwargs`. This is only true if these parameters are not explicit arguments of the config class's `__init__` method. i.e. `convert_rope_p\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43766",
+    "created_at": "2026-02-05T12:28:26Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43766/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43766",
+    "labels": [],
+    "merged": true,
+    "number": 43766,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix `convert_rope_params_to_dict` so it uses `rope_theta` from the config",
+    "updated_at": "2026-02-06T10:45:42Z"
+  },
+  {
+    "additions": 728,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? ## The problem Currently, `check_model_inputs` needs to iterate on all modules and monkey-patch all needed submodule's `forward` on-the-fly, before restoring them afterwards. This brings 2 big issues: - It's NOT thr\u2026",
+    "changed_files": 121,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43765",
+    "created_at": "2026-02-05T12:24:02Z",
+    "deletions": 565,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43765/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43765",
+    "labels": [],
+    "merged": true,
+    "number": 43765,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "[core] Faster and thread-safe `check_model_inputs` implementation",
+    "updated_at": "2026-02-20T08:32:16Z"
+  },
+  {
+    "additions": 18,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Background: PR https://github.com/huggingface/transformers/pull/43672 This PR modifies the default attention implementation in `modernbert`, switching away from `FlashAttention`. It also restores the related tests that were previously over\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43764",
+    "created_at": "2026-02-05T11:02:05Z",
+    "deletions": 364,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43764/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43764",
+    "labels": [],
+    "merged": true,
+    "number": 43764,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": ":rotating_light: Modify ModernBERT's default attention implementation to stop using FA",
+    "updated_at": "2026-02-06T11:26:44Z"
+  },
+  {
+    "additions": 100,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tweaks to `AGENTS.md` to make AI tools more efficient",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43763",
+    "created_at": "2026-02-05T10:25:59Z",
+    "deletions": 26,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43763/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43763",
+    "labels": [],
+    "merged": true,
+    "number": 43763,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Improved agents",
+    "updated_at": "2026-02-06T08:29:53Z"
+  },
+  {
+    "additions": 41,
+    "author": "molbap",
+    "author_association": "MEMBER",
+    "body_excerpt": "In case of module reloading, we currently lose tracking hooks for hidden states and attentions . Widening the matching condition a bit. Should fix https://github.com/huggingface/transformers/issues/43761, also mentioned in https://github.c\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43762",
+    "created_at": "2026-02-05T10:15:43Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43762/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43762",
+    "labels": [],
+    "merged": false,
+    "number": 43762,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": " Widen match condition for `_can_record_outputs`",
+    "updated_at": "2026-02-06T16:25:16Z"
+  },
+  {
+    "additions": 747,
+    "author": "Nitin75408",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes issue #43746: when loading PEFT adapters from local directories, the code no longer overrides the local path with the hub path from the adapter config, preventing unnecessary hub downloads. Centralizes path re\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43760",
+    "created_at": "2026-02-05T09:31:54Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43760/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43760",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43760,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fixed Models with PEFT adapters won't load from local checkpoints issue",
+    "updated_at": "2026-02-05T13:00:37Z"
+  },
+  {
+    "additions": 4,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil Hi, can you help review? Thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43759",
+    "created_at": "2026-02-05T08:42:24Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43759/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43759",
+    "labels": [],
+    "merged": true,
+    "number": 43759,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add use_deterministic to guarantee the consistency for youtu-llm model",
+    "updated_at": "2026-04-02T03:21:50Z"
+  },
+  {
+    "additions": 41,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh can you help review? Thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43758",
+    "created_at": "2026-02-05T08:11:34Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43758/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43758",
+    "labels": [],
+    "merged": true,
+    "number": 43758,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "stablize the expect output for eomt_dinov3 model test",
+    "updated_at": "2026-04-09T02:33:08Z"
+  },
+  {
+    "additions": 15,
+    "author": "TheSanjBot",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR avoids a hard failure when loading GGUF models that declare the `gpt-oss` architecture. Currently, such models raise a `ValueError` during GGUF config loading. This change maps `gpt-oss` to the closest suppo\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43366-4",
+    "cluster_ids": [
+      "cluster-43366-4"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43757",
+    "created_at": "2026-02-05T07:56:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43757/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43757",
+    "labels": [],
+    "merged": false,
+    "number": 43757,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Avoid hard failure for gpt-oss GGUF architecture by falling back to g\u2026",
+    "updated_at": "2026-02-05T17:42:11Z"
+  },
+  {
+    "additions": 27,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refines the test file of ernie4_5_vl_moe model, mainly 2 things: 1. avoid hard cord for expected image token count 2. add padding_side=\"left\" for bs > 1 inference",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43755",
+    "created_at": "2026-02-05T05:25:39Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43755/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43755",
+    "labels": [],
+    "merged": true,
+    "number": 43755,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Ernie4 5 vl moe",
+    "updated_at": "2026-04-13T02:40:24Z"
+  },
+  {
+    "additions": 0,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? https://github.com/pytorch/pytorch/issues/112577 was fixed in torch 2.2. Let's remove the workarounds and see what happen in the tests.",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43754",
+    "created_at": "2026-02-05T03:13:15Z",
+    "deletions": 52,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43754/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43754",
+    "labels": [],
+    "merged": true,
+    "number": 43754,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove SDPA workarounds for torch 2.4+",
+    "updated_at": "2026-02-05T14:44:16Z"
+  },
+  {
+    "additions": 1,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update KERNELS_MIN_VERSION to 0.10.2 to be the same as setup.py",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43753",
+    "created_at": "2026-02-05T03:02:43Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43753/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43753",
+    "labels": [],
+    "merged": true,
+    "number": 43753,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update KERNELS_MIN_VERSION to 0.10.2 to be the same as setup.py",
+    "updated_at": "2026-02-11T12:24:15Z"
+  },
+  {
+    "additions": 0,
+    "author": "bhargav-patel-29",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for **Param-2-17B-MoE-A2.4B**, a large-scale Mixture-of-Experts (MoE) causal language model. Param-2-17B-MoE-A2.4B uses a **Hybrid Dense + MoE architecture** with 17B total parameters while acti\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43752",
+    "created_at": "2026-02-05T02:43:45Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43752/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43752",
+    "labels": [],
+    "merged": false,
+    "number": 43752,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Param2moe v4.52.3",
+    "updated_at": "2026-02-10T10:28:02Z"
+  },
+  {
+    "additions": 152,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix more warnings in ruff 0.15 from existing enabled rules. Almost all changes are about turning `if A then A else B` into `A or B`.",
+    "changed_files": 40,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43751",
+    "created_at": "2026-02-05T02:08:42Z",
+    "deletions": 98,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43751/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43751",
+    "labels": [],
+    "merged": false,
+    "number": 43751,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix ruff warnings",
+    "updated_at": "2026-03-24T03:12:36Z"
+  },
+  {
+    "additions": 40,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "enable tp in benchmark_v2, to ensure large model could run.",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43750",
+    "created_at": "2026-02-05T01:53:45Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43750/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43750",
+    "labels": [],
+    "merged": true,
+    "number": 43750,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "enable tp for benchmark",
+    "updated_at": "2026-03-19T16:23:37Z"
+  },
+  {
+    "additions": 1372,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Improve types parsing to handle \"|\" syntax and other edge cases, add much needed tests Cc @Cyrilvallez ;)",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43748",
+    "created_at": "2026-02-04T22:08:11Z",
+    "deletions": 227,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43748/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43748",
+    "labels": [],
+    "merged": true,
+    "number": 43748,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[auto_docstring] Improve typing parsing and add tests",
+    "updated_at": "2026-02-25T18:42:39Z"
+  },
+  {
+    "additions": 52,
+    "author": "colldata79",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Title: Remove CompressedLinear support for compressed-tensors > 0.13 Body: ## What does this PR do? Prepares transformers for the removal of `CompressedLinear` from compressed-tensors (v0.14+). Users should now call `model.dequantize()` af\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43747",
+    "created_at": "2026-02-04T21:25:10Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43747/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43747",
+    "labels": [],
+    "merged": false,
+    "number": 43747,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Remove CompressedLinear support for compressed-tensors > 0.13",
+    "updated_at": "2026-02-05T18:30:27Z"
+  },
+  {
+    "additions": 74,
+    "author": "antznette1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43701 When resuming training via [Trainer.train(resume_from_checkpoint=...)](cci:1://file:///c:/Users/brass/OneDrive/Desktop/Work/clients/Anthonette/transformers/src/transformers/trainer.py:2070:4-2175:13), t\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43745",
+    "created_at": "2026-02-04T17:27:34Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43745/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43745",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43745,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Trainer resume_from_checkpoint to apply checkpoint conversion mappings",
+    "updated_at": "2026-02-05T12:59:43Z"
+  },
+  {
+    "additions": 635,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates a few minor things from trainer. Some of them are breaking but I think it should be safe to do as I don't think anyone is subclassing them or using them separately. Breaking: - move `propagate_args_t\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43744",
+    "created_at": "2026-02-04T17:16:20Z",
+    "deletions": 583,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43744/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43744",
+    "labels": [],
+    "merged": true,
+    "number": 43744,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "Minor changes trainer",
+    "updated_at": "2026-02-17T11:47:46Z"
+  },
+  {
+    "additions": 4124,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "**Update:** - improve sanitization of code pre-embedding - strip dtypes, args, params, etc. - filter self-contained model matches - improve summary (see below) - create prompt .md to create a modular file based on detector's results, that\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43743",
+    "created_at": "2026-02-04T17:01:41Z",
+    "deletions": 381,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43743/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43743",
+    "labels": [],
+    "merged": false,
+    "number": 43743,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Modular playground",
+    "updated_at": "2026-04-13T11:55:21Z"
+  },
+  {
+    "additions": 7,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "This time I'm focusing on `_get_component_class_from_processor` in the tests. I'm not sure if the way this operates can sometimes result in Tokenizer classes going missing.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43741",
+    "created_at": "2026-02-04T16:12:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43741/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43741",
+    "labels": [],
+    "merged": false,
+    "number": 43741,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Make the CI Green Round 3",
+    "updated_at": "2026-02-04T17:11:01Z"
+  },
+  {
+    "additions": 3,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR removes more old PT code. ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. <!-- Your PR\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43740",
+    "created_at": "2026-02-04T15:26:39Z",
+    "deletions": 30,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43740/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43740",
+    "labels": [],
+    "merged": false,
+    "number": 43740,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Remove more PyTorch < 2.4 code",
+    "updated_at": "2026-02-05T00:57:56Z"
+  },
+  {
+    "additions": 27,
+    "author": "Rayyan-Oumlil",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43698 ## Summary When using `Trainer` with SwanLab and resuming training (`trainer.train(resume_from_checkpoint=...)`), the integration previously had no way to pass `id` and `resume` to `swanlab.init()`, so a new experiment was alw\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43739",
+    "created_at": "2026-02-04T15:11:22Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43739/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43739",
+    "labels": [],
+    "merged": false,
+    "number": 43739,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "SwanLab: add id and resume support for resuming runs (fixes #43698)",
+    "updated_at": "2026-02-21T17:54:07Z"
+  },
+  {
+    "additions": 708,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR do the following: - Created src/transformers/trainer_optimizer.py with: - Helper functions for each optimizer category - Updated `Trainer.get_optimizer_cls_and_kwargs` - Simplify tests",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43738",
+    "created_at": "2026-02-04T15:09:30Z",
+    "deletions": 767,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43738/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43738",
+    "labels": [],
+    "merged": true,
+    "number": 43738,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[Trainer] Move optimizer cls init to trainer_optimizer.py",
+    "updated_at": "2026-02-10T14:26:45Z"
+  },
+  {
+    "additions": 197,
+    "author": "riccardofelluga",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds support for PyTorch's `associative_scan` higher-order op in Mamba and RecurrentGemma models, enabling parallel computation of the recurrent scan during `torch.compile()`. - Uses `associative_scan` when availabl\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 40,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43737",
+    "created_at": "2026-02-04T15:06:43Z",
+    "deletions": 31,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43737/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43737",
+    "labels": [],
+    "merged": true,
+    "number": 43737,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Use associative scan HOP mamba recurrentgemma",
+    "updated_at": "2026-02-25T17:06:20Z"
+  },
+  {
+    "additions": 155,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR do the following: - Extract `sorted_checkpoints` and `rotate_checkpoints` from Trainer class to standalone functions in `trainer_utils.py` - Rename `sorted_checkpoints` to `sort_checkpoints` - Simplify imple\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43736",
+    "created_at": "2026-02-04T14:28:04Z",
+    "deletions": 67,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43736/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43736",
+    "labels": [],
+    "merged": true,
+    "number": 43736,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "[Trainer] Move sort and rotate checkpoints to standalone functions",
+    "updated_at": "2026-02-05T15:26:50Z"
+  },
+  {
+    "additions": 10,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "One of the recurring issues we see in the CI is tokenizers occasionally being initialized as instances of `TokenizersBackend` rather than the actual tokenizer class. The error crops up all over, and is annoyingly hard to track down because\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43735",
+    "created_at": "2026-02-04T14:23:46Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43735/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43735",
+    "labels": [],
+    "merged": false,
+    "number": 43735,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Chase TokenizersBackend issue",
+    "updated_at": "2026-02-06T15:18:10Z"
+  },
+  {
+    "additions": 2643,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title. lays ground to unifying 3D position ids in qwen-style VLMs PR adds a single entrypoint to prepare position ids in `GenerationMixin` which models can override if needed (qwen-vl for ex). This allow user\u2026",
+    "changed_files": 36,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43734",
+    "created_at": "2026-02-04T11:12:26Z",
+    "deletions": 2693,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43734/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43734",
+    "labels": [],
+    "merged": true,
+    "number": 43734,
+    "review_comments_count": 34,
+    "state": "closed",
+    "title": "Prepare and keep track of position ids in `generate`",
+    "updated_at": "2026-02-12T09:57:20Z"
+  },
+  {
+    "additions": 3,
+    "author": "nesjett",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# Summary This PR removes the deprecated tokenizer parameter from the Quicktour documentation examples. As of the v5.0.0 release, the tokenizer argument was officially replaced from the Trainer constructor. Currently, the documentation exa\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43733",
+    "created_at": "2026-02-04T10:28:20Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43733/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43733",
+    "labels": [],
+    "merged": true,
+    "number": 43733,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Docs: fix Training step by removing tokenizer from trainer initialization",
+    "updated_at": "2026-02-04T16:49:59Z"
+  },
+  {
+    "additions": 37,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We need to move `get_module_size_with_ties` in transformers as the min version of accelerate 1.1.0 don't have this function. It was introduced in 1.2.0 actually . Another solution would be change the minimum version\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43732",
+    "created_at": "2026-02-04T09:58:11Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43732/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43732",
+    "labels": [],
+    "merged": true,
+    "number": 43732,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix accelerate integration import ",
+    "updated_at": "2026-02-04T14:57:51Z"
+  },
+  {
+    "additions": 3,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43716, as per title We didn't see errors even if we test `bf16` because most backbone models (e.g. SigLIP) cast pixels to target dtype inside the model code.\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43731",
+    "created_at": "2026-02-04T09:27:59Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43731/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43731",
+    "labels": [],
+    "merged": true,
+    "number": 43731,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix dtype in image-text-to-text pipe",
+    "updated_at": "2026-02-04T13:54:52Z"
+  },
+  {
+    "additions": 127,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? ## Fix GPT-OSS ??? EP sharding works, the forward is broken for eager (does not work) and output giberissh for grouped_mm. Will work on a fix to have in the patch! ## Fix any TP model - fix get shard tensor - fix ge\u2026",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43730",
+    "created_at": "2026-02-04T09:23:04Z",
+    "deletions": 80,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43730/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43730",
+    "labels": [],
+    "merged": true,
+    "number": 43730,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Fix EP post merge",
+    "updated_at": "2026-02-05T15:23:41Z"
+  },
+  {
+    "additions": 15,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes a bug regarding the doc tests. It also enables the doc tests for Eomt-DINOv3. ## Summary The `SKIP_CUDA_DOCTEST` environment variable check in `src/transformers/testing_utils.py` has a bug that causes\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43729",
+    "created_at": "2026-02-04T09:14:42Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43729/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43729",
+    "labels": [],
+    "merged": true,
+    "number": 43729,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[Doc tests] Fix bug",
+    "updated_at": "2026-02-13T09:17:44Z"
+  },
+  {
+    "additions": 10,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "In qwen2_5_vl model's test file, we use `Qwen/Qwen2.5-VL-7B-Instruct` model, which its default generation config's `do_sample` param is `True`. This causes some of the test cases are not steady, they will pass sometimes while sometimes get\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43728",
+    "created_at": "2026-02-04T07:55:12Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43728/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43728",
+    "labels": [],
+    "merged": true,
+    "number": 43728,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add `do_sample=False` to qwen2_5_vl model tests to stablize the output",
+    "updated_at": "2026-04-02T03:21:53Z"
+  },
+  {
+    "additions": 3,
+    "author": "ExcitingFrog",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Restore backward-compatible TensorBoard logging directory behavior. The callback now prefers `TENSORBOARD_LOGGING_DIR` when set, but falls back to `TrainingArguments.logging_dir` if the env var is not provided, pres\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43727",
+    "created_at": "2026-02-04T07:11:33Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43727/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43727",
+    "labels": [],
+    "merged": false,
+    "number": 43727,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat: Add legacy TrainingArguments logging_dir fallback for TensorBoard",
+    "updated_at": "2026-03-31T06:49:59Z"
+  },
+  {
+    "additions": 1381,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "Let's make sure it works for decoder only first (We skip VLM + Encoder-decoder for now) Introduction, forward, backward, generation (with convert mapping triggering) test agains TP vs non-TP baseline ```python from transformers import Auto\u2026",
+    "changed_files": 77,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43722",
+    "created_at": "2026-02-03T23:08:40Z",
+    "deletions": 796,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43722/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43722",
+    "labels": [],
+    "merged": true,
+    "number": 43722,
+    "review_comments_count": 84,
+    "state": "closed",
+    "title": "\ud83d\udea8 fix + tests dense & MoE TP all reduce (decoder only)",
+    "updated_at": "2026-03-04T16:06:14Z"
+  },
+  {
+    "additions": 88,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 Fixes `tests/models/bitnet/test_modeling_bitnet.py::BitNetIntegrationTest::test_model_generation && tests/models/bitnet/test_modeling_bitnet.py::BitNetIntegrationTest::test_model_logits`. \u2192 The reasoning; picked\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43721",
+    "created_at": "2026-02-03T20:01:08Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43721/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43721",
+    "labels": [],
+    "merged": true,
+    "number": 43721,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "fix(models): Unpack BitNet packed weights to fix CI failure",
+    "updated_at": "2026-04-18T08:35:13Z"
+  },
+  {
+    "additions": 16,
+    "author": "i-pj",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43698 - Add SWANLAB_RUN_ID env var to pass experiment ID for resumption - Add SWANLAB_RESUME env var to control resume mode - Default to resume='allow' when resume_from_checkpoint is set\" # What does this PR do? Fixes # (issue) ## B\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43719",
+    "created_at": "2026-02-03T18:59:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43719/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43719",
+    "labels": [],
+    "merged": true,
+    "number": 43719,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add id and resume support to SwanLab integration",
+    "updated_at": "2026-02-09T10:41:04Z"
+  },
+  {
+    "additions": 6,
+    "author": "lordaarush",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# Fix Trainer resume_from_checkpoint incorrectly calculating max_steps when changing batch size ## What does this PR do? When resuming training from a checkpoint with a different `per_device_train_batch_size` but the same global batch size\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43718",
+    "created_at": "2026-02-03T18:37:08Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43718/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43718",
+    "labels": [],
+    "merged": false,
+    "number": 43718,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix resume_from_checkpoint ignoring user's batch size configuration",
+    "updated_at": "2026-02-06T15:14:33Z"
+  },
+  {
+    "additions": 2,
+    "author": "stromfee",
+    "author_association": "NONE",
+    "body_excerpt": "AgentMarket.cloud - B2A marketplace with 28M+ real energy records. Ohne Strom keine KI. \u26a1 https://agentmarket.cloud",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43715",
+    "created_at": "2026-02-03T17:31:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43715/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43715",
+    "labels": [],
+    "merged": false,
+    "number": 43715,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add AgentMarket - Real Energy Data for AI Agents",
+    "updated_at": "2026-02-04T12:45:23Z"
+  },
+  {
+    "additions": 137,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do ? This PR refactor NEFTune implementation from Trainer methods to standalone functions. Part of PRs to unbloat Trainer.",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43714",
+    "created_at": "2026-02-03T16:35:29Z",
+    "deletions": 74,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43714/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43714",
+    "labels": [],
+    "merged": true,
+    "number": 43714,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[Trainer] Move NEFTune impl to standalone functions",
+    "updated_at": "2026-02-05T18:58:14Z"
+  },
+  {
+    "additions": 3,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The generate tests that compare prompt lookup or speculative decoding to the base model have an extremely high rate of flakiness, I guess because of inherent non-determinism. The actual generation works, but the test frequently sees diverg\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43713",
+    "created_at": "2026-02-03T15:51:38Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43713/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43713",
+    "labels": [],
+    "merged": false,
+    "number": 43713,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "More flaky generate tests",
+    "updated_at": "2026-02-06T17:40:10Z"
+  },
+  {
+    "additions": 56,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. See https://github.com/huggingface/transformers/issues/42832#issuecomment-3840648709 where it was pointed out",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43712",
+    "created_at": "2026-02-03T15:41:51Z",
+    "deletions": 70,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43712/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43712",
+    "labels": [],
+    "merged": true,
+    "number": 43712,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve typing/explanations for general model properties",
+    "updated_at": "2026-02-04T09:28:31Z"
+  },
+  {
+    "additions": 21,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR put the `lr_scheduler` initialization after the optimizer is prepared to deal with any modification that might happen after. This is mainly to deal with deepspeed as the optimizer is modified and starting wi\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43711",
+    "created_at": "2026-02-03T15:14:52Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43711/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43711",
+    "labels": [],
+    "merged": true,
+    "number": 43711,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix scheduler initialization order",
+    "updated_at": "2026-02-04T16:08:18Z"
+  },
+  {
+    "additions": 130,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates the docs of [GLM-OCR](https://huggingface.co/zai-org/GLM-OCR) and fixes a link for the docs of EomT-DINOv3. Follow-up of https://github.com/huggingface/transformers/pull/43391.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43710",
+    "created_at": "2026-02-03T15:07:07Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43710/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43710",
+    "labels": [],
+    "merged": true,
+    "number": 43710,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[Docs] Add docs for GLM-OCR and fix EomT-DINOv3",
+    "updated_at": "2026-02-10T15:35:23Z"
+  },
+  {
+    "additions": 0,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? I had this typing failure ``` $ ty check src/transformers/utils/import_utils.py [snip] error[call-non-callable]: Object of type `VersionComparison` is not callable --> src/transformers/utils/import_utils.py:2317:16\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43709",
+    "created_at": "2026-02-03T14:26:58Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43709/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43709",
+    "labels": [],
+    "merged": true,
+    "number": 43709,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: `VersionComparison.from_string` return type mismatch",
+    "updated_at": "2026-02-23T19:05:33Z"
+  },
+  {
+    "additions": 2202,
+    "author": "liu-jiaxuan",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43707",
+    "created_at": "2026-02-03T13:33:41Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43707/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43707",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43707,
+    "review_comments_count": 145,
+    "state": "closed",
+    "title": "[Model] Add SLANeXt Model Support",
+    "updated_at": "2026-03-20T17:24:22Z"
+  },
+  {
+    "additions": 42,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, the new way to call the attention interface has slipped through a refactor because it's too new and not too well known atm cc @yonigozlan",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43706",
+    "created_at": "2026-02-03T11:57:22Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43706/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43706",
+    "labels": [],
+    "merged": true,
+    "number": 43706,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[`Attn`] Fixup interface usage after refactor",
+    "updated_at": "2026-02-03T14:56:35Z"
+  },
+  {
+    "additions": 120,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Allow the `is_causal` kwarg and config attribute to make well-behaved decoder-only models act as encoders",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43705",
+    "created_at": "2026-02-03T11:45:43Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43705/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43705",
+    "labels": [],
+    "merged": true,
+    "number": 43705,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "Allow bi-directional attention for all models",
+    "updated_at": "2026-02-04T17:24:32Z"
+  },
+  {
+    "additions": 1,
+    "author": "francesco-bertolotti",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "wrong `rms_norm_type` # What does this PR do? Small type error in the configuration of qwen3. `rms_norm_eps` should be a float and not an int. ## Before submitting - [ X] This PR fixes a typo or improves the docs (you can dismiss the other\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43703",
+    "created_at": "2026-02-03T10:05:17Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43703/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43703",
+    "labels": [],
+    "merged": true,
+    "number": 43703,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update configuration_qwen3.py",
+    "updated_at": "2026-02-04T07:03:04Z"
+  },
+  {
+    "additions": 2828,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds[ UsefulSensors'](https://huggingface.co/UsefulSensors) new ASR model.",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43702",
+    "created_at": "2026-02-03T09:32:42Z",
+    "deletions": 247,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43702/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43702",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43702,
+    "review_comments_count": 30,
+    "state": "closed",
+    "title": "Add moonshine streaming",
+    "updated_at": "2026-02-12T10:10:16Z"
+  },
+  {
+    "additions": 1,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Here pytorch has a mature mechanism to auto select the right backend for different devices. @ydshieh pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43699",
+    "created_at": "2026-02-03T07:33:04Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43699/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43699",
+    "labels": [],
+    "merged": false,
+    "number": 43699,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "avoid using specified backend for tp tests",
+    "updated_at": "2026-03-09T08:17:48Z"
+  },
+  {
+    "additions": 1,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- model loading (from pretrained, etc): @CyrilVallez - distributed: @3outeille @ArthurZucker fix tp crash. crash stack is [rank0]: Traceback (most recent call last): [rank0]: File \"/transformers/benchmark_v2/test_tp.py\", line 29, in <modul\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43695",
+    "created_at": "2026-02-03T01:30:55Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43695/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43695",
+    "labels": [],
+    "merged": true,
+    "number": 43695,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix gptoss tp crash",
+    "updated_at": "2026-02-03T10:20:30Z"
+  },
+  {
+    "additions": 1,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates link to benchmark's new location",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43694",
+    "created_at": "2026-02-03T01:21:15Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43694/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43694",
+    "labels": [],
+    "merged": true,
+    "number": 43694,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] benchmarks",
+    "updated_at": "2026-02-03T17:00:13Z"
+  },
+  {
+    "additions": 1,
+    "author": "WilliamRoyNelson",
+    "author_association": "NONE",
+    "body_excerpt": "# Update doc preprocessing regex to prevent ReDoS The regular expression for capturing docstrings is vulnerable to a [ReDoS attack](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS) The previous change d\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43693",
+    "created_at": "2026-02-03T01:06:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43693/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43693",
+    "labels": [],
+    "merged": false,
+    "number": 43693,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update doc preprocessing regex to prevent ReDoS",
+    "updated_at": "2026-02-03T17:23:59Z"
+  },
+  {
+    "additions": 13,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary On PyTorch 2.10+, `Trainer.train()` crashes at the first `lr_scheduler.step()` when using DeepSpeed ZeRO-3 with a PEFT model. This PR provides fix, alothough I'm sure it's not the ideal one. The failure only appears with torch 2\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43692",
+    "created_at": "2026-02-02T21:42:11Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43692/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43692",
+    "labels": [],
+    "merged": false,
+    "number": 43692,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix DeepSpeed ZeRO-3 + PEFT + Trainer scheduler mismatch on PyTorch 2.10+",
+    "updated_at": "2026-02-03T15:40:19Z"
+  },
+  {
+    "additions": 109,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. All those important static properties (`_keep_in_fp32_modules`, `_no_split_modules`, etc) need to be determined automatically in the case of composite models. This follows the same approach I previ\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43691",
+    "created_at": "2026-02-02T18:17:44Z",
+    "deletions": 121,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43691/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43691",
+    "labels": [],
+    "merged": true,
+    "number": 43691,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "Composite model inherit automatically all important properties from their children",
+    "updated_at": "2026-02-03T16:57:41Z"
+  },
+  {
+    "additions": 10,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/43645. It's quite unusual, but no reason it should crash in general",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43690",
+    "created_at": "2026-02-02T17:14:50Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43690/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43690",
+    "labels": [],
+    "merged": true,
+    "number": 43690,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix crash of custom models in Notebook or Repl",
+    "updated_at": "2026-02-03T13:20:20Z"
+  },
+  {
+    "additions": 1,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "`additional_special_tokens_ids`: Removed. Use `extra_special_tokens_ids` instead",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43689",
+    "created_at": "2026-02-02T16:18:36Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43689/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43689",
+    "labels": [],
+    "merged": true,
+    "number": 43689,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "update guide with new attr name for toks",
+    "updated_at": "2026-02-02T21:04:22Z"
+  },
+  {
+    "additions": 164,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - adds Bandit's S110 that makes sure we don't have a dry `Except` - fixes all occurrences - mark a couple of spots where we could tighten the `Exception` catch all I focused on making changes under `src/transformers\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43687",
+    "created_at": "2026-02-02T15:29:48Z",
+    "deletions": 150,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43687/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43687",
+    "labels": [],
+    "merged": true,
+    "number": 43687,
+    "review_comments_count": 37,
+    "state": "closed",
+    "title": "Added S110 - try-except-pass rule",
+    "updated_at": "2026-02-03T21:20:36Z"
+  },
+  {
+    "additions": 1,
+    "author": "jianchang512",
+    "author_association": "NONE",
+    "body_excerpt": "Tokenization should be performed on the source language, i.e., `fi_text`. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43686",
+    "created_at": "2026-02-02T15:14:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43686/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43686",
+    "labels": [],
+    "merged": false,
+    "number": 43686,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix variable referencing error during word segmentation",
+    "updated_at": "2026-02-02T15:16:51Z"
+  },
+  {
+    "additions": 2645,
+    "author": "cmakinet",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? Adds a comprehensive Hugging Face Transformers Skill following the Agent Skills Open Standard as requested in issue #42971. Fixes # (issue) 1. Created a complete Hugging Face Transformers Skill package in `skills/hu\u2026",
+    "changed_files": 14,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43685",
+    "created_at": "2026-02-02T14:48:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43685/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43685",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43685,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat: Add Hugging Face Transformers Skill (closes #42971)",
+    "updated_at": "2026-02-03T14:18:13Z"
+  },
+  {
+    "additions": 112,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. https://github.com/huggingface/transformers/pull/41580 broke the `keep_in_fp32_modules` flag as it's supposed to be used only with fp16, not bf16. I added very strict tests on this to avoid name cl\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43683",
+    "created_at": "2026-02-02T14:14:36Z",
+    "deletions": 26,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43683/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43683",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43683,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "[loading] Fix forced upcasting to fp32",
+    "updated_at": "2026-02-02T15:41:47Z"
+  },
+  {
+    "additions": 34,
+    "author": "githubnemo",
+    "author_association": "MEMBER",
+    "body_excerpt": "PR #41541 refactored `tie_word_embeddings` handling (among other things) which subtly broke detection of T5 v1.1 vs. original detection. As a consequence, decoder output scaling was always applied, regardless of T5 version. This is resolve\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43681",
+    "created_at": "2026-02-02T13:12:00Z",
+    "deletions": 34,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43681/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43681",
+    "labels": [],
+    "merged": true,
+    "number": 43681,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix T5 v1.1 detection",
+    "updated_at": "2026-02-05T11:02:03Z"
+  },
+  {
+    "additions": 151,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? I also want to see if linear cache thing can be squeezed in this PR. If it requires big diffs, I'll split into two Fixes https://github.com/huggingface/transformers/issues/43673 Sidenote: kinda breaking but in a goo\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43679",
+    "created_at": "2026-02-02T11:29:06Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43679/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43679",
+    "labels": [],
+    "merged": true,
+    "number": 43679,
+    "review_comments_count": 22,
+    "state": "closed",
+    "title": "\ud83d\udea8 Generation cache preparation",
+    "updated_at": "2026-02-04T13:22:50Z"
+  },
+  {
+    "additions": 10,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "In this PR, we do several things for llava_onevision model: 1. skip torch_exportable tests as it does not support it 2. unify expected output for cuda and xpu 3. add `image_sizes` param in `flash_attn_inference_equivalence` func to support\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43678",
+    "created_at": "2026-02-02T10:31:21Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43678/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43678",
+    "labels": [],
+    "merged": true,
+    "number": 43678,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "Llava onevision: output align for tests and add `image_sizes` input param",
+    "updated_at": "2026-04-09T02:33:05Z"
+  },
+  {
+    "additions": 7,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43677",
+    "created_at": "2026-02-02T10:31:05Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43677/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43677",
+    "labels": [],
+    "merged": false,
+    "number": 43677,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "one possible fix issue #43676",
+    "updated_at": "2026-02-20T08:43:59Z"
+  },
+  {
+    "additions": 20,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? in `PreTrainedTokenizerBase.from_pretrained` this commit https://github.com/huggingface/transformers/commit/73a13f86f6d208882d59d1200609986c5a5f49a7#diff-85b29486a884f445b1014[\u2026]f4ae701ee758a754fddcc1L1679 silenced\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43675",
+    "created_at": "2026-02-02T09:52:19Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43675/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43675",
+    "labels": [
+      "bug"
+    ],
+    "merged": true,
+    "number": 43675,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "make sure hub errors are surfaced",
+    "updated_at": "2026-02-02T12:41:11Z"
+  },
+  {
+    "additions": 30,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43262 Supersedes https://github.com/huggingface/transformers/pull/43273 and https://github.com/huggingface/transformers/pull/43660 I don't think the prev PR w\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43674",
+    "created_at": "2026-02-02T09:32:00Z",
+    "deletions": 42,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43674/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43674",
+    "labels": [],
+    "merged": true,
+    "number": 43674,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Use correct sampling rate in chat template",
+    "updated_at": "2026-02-02T11:32:37Z"
+  },
+  {
+    "additions": 383,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? According to PR https://github.com/huggingface/transformers/pull/43030, update test files `tests/models/pe_audio/test_modeling_pe_audio.py` and `tests/models/pe_video/test_modeling_pe_video.py`. The changes to `src/\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43672",
+    "created_at": "2026-02-02T04:10:06Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43672/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43672",
+    "labels": [],
+    "merged": false,
+    "number": 43672,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix pe uts",
+    "updated_at": "2026-02-05T11:03:54Z"
+  },
+  {
+    "additions": 1,
+    "author": "yiliu30",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Signed-off-by: yiliu30 <yi4.liu@intel.com> - Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 ```bash input = { \"messages\": [ { \"role\": \"user\", \"content\": [ { \"type\": \"text\", \"text\": \"The history of France is \", } ], }, ], } I have a question about th\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43670",
+    "created_at": "2026-02-02T02:06:14Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43670/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43670",
+    "labels": [],
+    "merged": true,
+    "number": 43670,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix FP8Expert for Qwen",
+    "updated_at": "2026-02-02T15:18:49Z"
+  },
+  {
+    "additions": 2,
+    "author": "fschlatt",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43669",
+    "created_at": "2026-02-01T09:47:44Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43669/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43669",
+    "labels": [],
+    "merged": true,
+    "number": 43669,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix norm_eps dtype",
+    "updated_at": "2026-02-03T14:34:00Z"
+  },
+  {
+    "additions": 2,
+    "author": "CodeByKodi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- index.md: \"multimodal model\" \u2192 \"multimodal models\" - quicktour.md: \"Pytorch\" \u2192 \"PyTorch\" # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43666",
+    "created_at": "2026-02-01T03:24:56Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43666/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43666",
+    "labels": [],
+    "merged": true,
+    "number": 43666,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Docs: fix typos in Get started (index, quicktour)",
+    "updated_at": "2026-02-02T17:47:53Z"
+  },
+  {
+    "additions": 9,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43665",
+    "created_at": "2026-01-31T21:50:43Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43665/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43665",
+    "labels": [],
+    "merged": false,
+    "number": 43665,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix",
+    "updated_at": "2026-03-24T14:42:35Z"
+  },
+  {
+    "additions": 1256,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR introduces a declarative pipeline composition system that enables chaining multiple HuggingFace pipelines into unified workflows. It provides a clean, Pythonic API for building multi-model applications with\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43664",
+    "created_at": "2026-01-31T21:35:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43664/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43664",
+    "labels": [],
+    "merged": false,
+    "number": 43664,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add pipeline composition DSL for multi-model workflows",
+    "updated_at": "2026-02-03T13:45:11Z"
+  },
+  {
+    "additions": 13,
+    "author": "abigailtech",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Adds _get_signature_columns method that custom trainers can override to specify which columns to preserve when remove_unused_columns=True. Currently, TRL trainers (DPO, GRPO) must override _set_signature_columns_if_needed entirely just to\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43663",
+    "created_at": "2026-01-31T21:34:03Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43663/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43663",
+    "labels": [],
+    "merged": false,
+    "number": 43663,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add _get_signature_columns method to allow custom trainers to override column filtering",
+    "updated_at": "2026-02-03T13:32:17Z"
+  },
+  {
+    "additions": 7,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The file `utils/process_bad_commit_report.py` tried to get a team member to ping on slack, including someone merged a PR despite they are not the author of that PR. However, that part was written before we have PR c\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43662",
+    "created_at": "2026-01-31T21:26:22Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43662/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43662",
+    "labels": [],
+    "merged": true,
+    "number": 43662,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `process_bad_commit_report.py`: avoid items to appear in `null` author in the report",
+    "updated_at": "2026-01-31T21:46:01Z"
+  },
+  {
+    "additions": 2999,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## \ud83d\ude80 Feature: Unified Model Introspection & Diagnostics Framework ### What does this PR do? This PR introduces a comprehensive diagnostics framework for profiling and analyzing transformer models during inference. It provides a unified API\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43661",
+    "created_at": "2026-01-31T20:17:33Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43661/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43661",
+    "labels": [],
+    "merged": false,
+    "number": 43661,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat: Add unified model introspection & diagnostics framework",
+    "updated_at": "2026-02-03T13:45:28Z"
+  },
+  {
+    "additions": 71,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43262 ### Problem The `apply_chat_template()` method always defaults to 16kHz sampling rate, even when the processor's feature extractor specifies a different rate: ```python processor = AutoProcessor.from_p\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43660",
+    "created_at": "2026-01-31T18:58:29Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43660/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43660",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43660,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix audio processors defaulting to 16kHz in apply_chat_template",
+    "updated_at": "2026-02-02T12:17:40Z"
+  },
+  {
+    "additions": 2,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43519 ### Problem The Qwen3VL processor was incorrectly using `merge_size` (spatial merging) instead of `temporal_patch_size` (temporal grouping) when calculating video frame timestamps. ```python # Before (\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43659",
+    "created_at": "2026-01-31T18:57:21Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43659/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43659",
+    "labels": [],
+    "merged": true,
+    "number": 43659,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix incorrect timestamp calculation in Qwen3VL Processor",
+    "updated_at": "2026-02-09T09:54:09Z"
+  },
+  {
+    "additions": 3,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43335 ### Problem When configuring a SwitchTransformers model with `num_sparse_encoder_layers=0` (intending to have zero sparse layers), a sparse layer is still incorrectly created when `num_layers=1`: ```py\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43658",
+    "created_at": "2026-01-31T18:56:26Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43658/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43658",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43658,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix SwitchTransformers creating sparse layer when num_sparse_*_layers=0",
+    "updated_at": "2026-02-03T15:31:56Z"
+  },
+  {
+    "additions": 6,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43618 ### Problem Since v5, `CLIPVisionModel` and `CLIPTextModel` no longer return attention weights when `output_attentions=True`: ```python model = CLIPModel.from_pretrained(model_path, attn_implementation\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43657",
+    "created_at": "2026-01-31T18:55:33Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43657/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43657",
+    "labels": [],
+    "merged": true,
+    "number": 43657,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix CLIPOutput attentions not being returned",
+    "updated_at": "2026-02-03T13:51:22Z"
+  },
+  {
+    "additions": 9,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43576 The `transformers env` command was failing with: ``` NameError: name 'TypeAdapter' is not defined ``` ### Root Cause The `Serve` class in `serve.py` uses `TypeAdapter` (from pydantic) as a type annotat\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43656-4",
+    "cluster_ids": [
+      "cluster-43656-4"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43656",
+    "created_at": "2026-01-31T18:54:19Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43656/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43656",
+    "labels": [],
+    "merged": false,
+    "number": 43656,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Fix TypeAdapter NameError in transformers CLI",
+    "updated_at": "2026-03-01T12:00:57Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The PR #43628 changed a bit the format of `new_failures.json` and causes the `check_bad_commit.py` may fail. (\"single-gpu\" is no longer guaranteed to be in the dict)",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43655",
+    "created_at": "2026-01-31T18:47:01Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43655/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43655",
+    "labels": [],
+    "merged": true,
+    "number": 43655,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `KeyError` in `check_bad_commit.py`",
+    "updated_at": "2026-01-31T18:50:59Z"
+  },
+  {
+    "additions": 27,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 Fixes `test_modeling_big_bird.py::BigBirdModelIntegrationTest::test_fill_mask`. For more details on reproducing the bug, please visit the linked issue! Fixes #43653. ### Before submitting - [ ] This PR fixes a t\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43654",
+    "created_at": "2026-01-31T18:31:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43654/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43654",
+    "labels": [],
+    "merged": false,
+    "number": 43654,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "fix(tokenizer): Avert special token property overwrites in batch add_tokens calls",
+    "updated_at": "2026-04-18T08:36:46Z"
+  },
+  {
+    "additions": 1,
+    "author": "jonathan-fulton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes the model/processor mismatch in the SigLIP2 documentation quantization example. Fixes #39692 ## The Problem The quantization example used mismatched model and processor checkpoints: - Model: `google/siglip2-l\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43652",
+    "created_at": "2026-01-31T18:17:25Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43652/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43652",
+    "labels": [],
+    "merged": true,
+    "number": 43652,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix model/processor mismatch in SigLIP2 quantization example",
+    "updated_at": "2026-02-03T13:38:04Z"
+  },
+  {
+    "additions": 13,
+    "author": "abigailtech",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Added a _loss_is_scaled_for_ga property that custom trainers can override to explicitly control gradient accumulation loss scaling. The default implementation preserves backward compatibility. Custom trainers can now simply override this p\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43651",
+    "created_at": "2026-01-31T17:51:42Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43651/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43651",
+    "labels": [],
+    "merged": false,
+    "number": 43651,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add _loss_is_scaled_for_ga to allow custom trainers to control gradient accumulation loss scaling",
+    "updated_at": "2026-03-13T22:28:27Z"
+  },
+  {
+    "additions": 158,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 85,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43649",
+    "created_at": "2026-01-31T14:06:38Z",
+    "deletions": 143,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43649/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43649",
+    "labels": [],
+    "merged": false,
+    "number": 43649,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Check new failures reporting 5",
+    "updated_at": "2026-02-16T07:56:27Z"
+  },
+  {
+    "additions": 26,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43648",
+    "created_at": "2026-01-31T12:23:05Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43648/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43648",
+    "labels": [],
+    "merged": false,
+    "number": 43648,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check",
+    "updated_at": "2026-01-31T21:51:06Z"
+  },
+  {
+    "additions": 22,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43647",
+    "created_at": "2026-01-31T11:38:27Z",
+    "deletions": 100,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43647/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43647",
+    "labels": [],
+    "merged": false,
+    "number": 43647,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check",
+    "updated_at": "2026-02-13T02:07:05Z"
+  },
+  {
+    "additions": 1,
+    "author": "prachigurav1203",
+    "author_association": "NONE",
+    "body_excerpt": "change # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43642",
+    "created_at": "2026-01-31T07:02:11Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43642/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43642",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43642,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve benchmark_config documentation",
+    "updated_at": "2026-02-03T13:26:46Z"
+  },
+  {
+    "additions": 1,
+    "author": "KOKOSde",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Docs fix for SigLIP2 usage examples. - Use the correct `torch_dtype=` argument in examples (instead of `dtype=`). - Fix the processor model id in the 4-bit example to match the model being loaded. Docs-only change.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43641",
+    "created_at": "2026-01-31T07:00:56Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43641/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43641",
+    "labels": [],
+    "merged": true,
+    "number": 43641,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Docs: fix SigLIP2 usage examples",
+    "updated_at": "2026-03-06T20:02:56Z"
+  },
+  {
+    "additions": 45,
+    "author": "KOKOSde",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Detect adapter repositories (via `adapter_config.json`) and raise a clear, actionable error when `peft` is not installed, instead of the generic \"missing pytorch_model.bin/model.safetensors\" message. ## Changes - Check for `adap\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43640",
+    "created_at": "2026-01-31T07:00:53Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43640/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43640",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43640,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve error message for PEFT adapter without peft",
+    "updated_at": "2026-02-03T13:26:19Z"
+  },
+  {
+    "additions": 5,
+    "author": "Justynita",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? The Quickstart \"Open in Colab\" example can fail in a fresh Google Colab environment due to an incompatible preinstalled version of `huggingface_hub`. This PR updates the installation command to explicitly constrain\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43639",
+    "created_at": "2026-01-31T04:47:25Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43639/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43639",
+    "labels": [],
+    "merged": false,
+    "number": 43639,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Quickstart Colab dependency mismatch",
+    "updated_at": "2026-02-03T13:25:12Z"
+  },
+  {
+    "additions": 37,
+    "author": "22navyakumar",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43637",
+    "created_at": "2026-01-31T00:12:02Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43637/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43637",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43637,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Exercise 1: MX Linear for LlamaAttention",
+    "updated_at": "2026-02-03T12:34:49Z"
+  },
+  {
+    "additions": 11,
+    "author": "abigailtech",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Adds a private _metrics dict to the Trainer class that allows custom trainers to log metrics without overriding log. Custom trainers can now simply do: `self._metrics[mode][\"my_metric\"].append(value)` And the metrics will be automatically\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43636",
+    "created_at": "2026-01-30T21:53:43Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43636/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43636",
+    "labels": [],
+    "merged": false,
+    "number": 43636,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Add _metrics dict to Trainer for custom metric logging",
+    "updated_at": "2026-03-13T22:22:03Z"
+  },
+  {
+    "additions": 48,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add explicit commit info to PR comment CI feedback + improve info. contained in the new failures report",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 21,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43635",
+    "created_at": "2026-01-30T20:39:22Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43635/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43635",
+    "labels": [],
+    "merged": true,
+    "number": 43635,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add explicit commit info to PR comment CI feedback",
+    "updated_at": "2026-01-31T11:34:48Z"
+  },
+  {
+    "additions": 10,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "There are some flaky test failures that are very hard to track down. I suspect the modification of core mappings and `importlib.reload()` in `test_voxtral_tokenizer_converts_from_tekken` has some kind of weird race condition or state mutat\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43634",
+    "created_at": "2026-01-30T16:14:31Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43634/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43634",
+    "labels": [],
+    "merged": true,
+    "number": 43634,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Don't modify mappings in tests",
+    "updated_at": "2026-01-30T16:48:06Z"
+  },
+  {
+    "additions": 324,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Makes sure that the attn implementation is set to all sub-configs. The `config.encoder.text_config` was not getting its attn set because we aren't passing it to `PreTrainedModel.__init__`. We can't change the model\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43633",
+    "created_at": "2026-01-30T15:50:24Z",
+    "deletions": 258,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43633/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43633",
+    "labels": [],
+    "merged": true,
+    "number": 43633,
+    "review_comments_count": 15,
+    "state": "closed",
+    "title": "\ud83d\udea8 T5Gemma2 model structure",
+    "updated_at": "2026-02-04T14:44:55Z"
+  },
+  {
+    "additions": 3,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Disabling the validation of args torch distributions due to a) dummy values not always being sound b) seemingly torch internals failing checks (?); unsure not used to parsing torch export reports Following errors are in export otherwise: -\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43631",
+    "created_at": "2026-01-30T14:49:56Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43631/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43631",
+    "labels": [],
+    "merged": false,
+    "number": 43631,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[`Export`] Disable argument check in torch distributions for export",
+    "updated_at": "2026-03-14T19:59:23Z"
+  },
+  {
+    "additions": 94,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Provide more information like - the link to the hub repository containing the whole set of failed tests caused by the PR Also clean up some internal logic ## CI Results [Workflow Run \u2699\ufe0f](https://github.com/huggingfa\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43629",
+    "created_at": "2026-01-30T14:38:34Z",
+    "deletions": 44,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43629/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43629",
+    "labels": [],
+    "merged": true,
+    "number": 43629,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Better new failures reporting for PR comment CI",
+    "updated_at": "2026-01-30T20:43:14Z"
+  },
+  {
+    "additions": 103,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Clean up the `new_failures` reports, also adding the \"trace\". It looks like https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/blob/main/2026-01-30/runs/1596-21518380210/ci_results_run_models_\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43628",
+    "created_at": "2026-01-30T14:30:47Z",
+    "deletions": 111,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43628/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43628",
+    "labels": [],
+    "merged": true,
+    "number": 43628,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve new failures reporting",
+    "updated_at": "2026-01-30T14:40:01Z"
+  },
+  {
+    "additions": 495,
+    "author": "Suraj1199",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43627",
+    "created_at": "2026-01-30T14:22:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43627/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43627",
+    "labels": [],
+    "merged": false,
+    "number": 43627,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add New Scratch Implementation for Transformers",
+    "updated_at": "2026-01-30T14:23:15Z"
+  },
+  {
+    "additions": 40,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "The test `tests/generation/test_paged_attention.py::TestBatchGeneration::test_generate_batch_consistency` is failing because CB shuffles incoming request to maximize prefix caching. This is not a problem unless the user expects outputs to\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43626",
+    "created_at": "2026-01-30T14:12:40Z",
+    "deletions": 33,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43626/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43626",
+    "labels": [],
+    "merged": true,
+    "number": 43626,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[CB] Keep order of incoming requests",
+    "updated_at": "2026-02-03T09:59:47Z"
+  },
+  {
+    "additions": 3079,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds VibeVoice ASR, branches off of (and hence depends on) https://github.com/huggingface/transformers/pull/43400 Original checkpoint: https://huggingface.co/microsoft/VibeVoice-ASR Original modeling code: https://g\u2026",
+    "changed_files": 30,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43625",
+    "created_at": "2026-01-30T14:06:26Z",
+    "deletions": 311,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43625/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43625",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": true,
+    "number": 43625,
+    "review_comments_count": 65,
+    "state": "closed",
+    "title": "Add VibeVoice ASR",
+    "updated_at": "2026-03-02T11:29:55Z"
+  },
+  {
+    "additions": 1987,
+    "author": "jeeth-kataria",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Adds a Claude SKILL for PEFT (Parameter-Efficient Fine-Tuning) and LoRA guidance, complementing the transformers-api skill in #43340. ## Why is this needed? PEFT/LoRA is one of the most popular integrations with Tr\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43624",
+    "created_at": "2026-01-30T13:59:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43624/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43624",
+    "labels": [],
+    "merged": false,
+    "number": 43624,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Claude Skill for PEFT/LoRA fine-tuning guidance",
+    "updated_at": "2026-02-03T13:08:11Z"
+  },
+  {
+    "additions": 77,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? as per title",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43623",
+    "created_at": "2026-01-30T13:21:16Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43623/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43623",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43623,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Tie zamba weights correctly",
+    "updated_at": "2026-02-02T09:17:36Z"
+  },
+  {
+    "additions": 1149,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Also fix a vllm regresion on experts. Allow Experts and Attention module to be used on their own, such as ```py import torch from transformers import AutoConfig from transformers.models.mixtral.mod\u2026",
+    "changed_files": 295,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43622",
+    "created_at": "2026-01-30T12:17:22Z",
+    "deletions": 1102,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43622/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43622",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43622,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Allow Attention and Experts to be used as standalone modules",
+    "updated_at": "2026-01-30T16:35:32Z"
+  },
+  {
+    "additions": 13,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Following specs described by @hmellor for vllm compatibility, no logic change should be required. Only the following small changes to mappings should be enough",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43621",
+    "created_at": "2026-01-30T11:41:44Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43621/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43621",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43621,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[vllm compat] Separate renaming from conversion ops",
+    "updated_at": "2026-01-30T17:12:06Z"
+  },
+  {
+    "additions": 281,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, this is mainly due to the fact that we have little control over remote code which could lead to solutions like - Closes #43609 --> makes the whole mixin behave like a static holder for methods... - Modify methods/inherited cl\u2026",
+    "changed_files": 137,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43620",
+    "created_at": "2026-01-30T11:24:09Z",
+    "deletions": 288,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43620/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43620",
+    "labels": [],
+    "merged": true,
+    "number": 43620,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Rope`] Revert #43410 and make inheritance implicit again",
+    "updated_at": "2026-01-30T18:44:16Z"
+  },
+  {
+    "additions": 40,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, some models add or delete entries in tied weights depending on configuration. If we load two models consecutively with different configs, it fails to tie weights correctly I am copying it in `__init__`\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43619",
+    "created_at": "2026-01-30T10:43:38Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43619/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43619",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43619,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "Don't modify `tied_weight_keys` in-place",
+    "updated_at": "2026-01-30T15:46:02Z"
+  },
+  {
+    "additions": 17,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@zucchini-nlp pls help review, thx! We have to add back the changes in https://github.com/huggingface/transformers/pull/42523. As for llava_onevision model, in its checkpoint config file, the model's `tie_word_embeddings` is Flase, and mod\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43617",
+    "created_at": "2026-01-30T10:21:45Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43617/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43617",
+    "labels": [],
+    "merged": false,
+    "number": 43617,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix tie_word_embedding issue for llava_onevision model",
+    "updated_at": "2026-04-13T02:41:01Z"
+  },
+  {
+    "additions": 3,
+    "author": "yiliu30",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Signed-off-by: yiliu30 <yi4.liu@intel.com> # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so mak\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43616",
+    "created_at": "2026-01-30T08:45:18Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43616/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43616",
+    "labels": [],
+    "merged": true,
+    "number": 43616,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix `FP8Expert` for DeepSeek R1",
+    "updated_at": "2026-02-02T11:55:53Z"
+  },
+  {
+    "additions": 2,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@SunMarc this PR fixes failed case of `tests/quantization/bnb/test_mixed_int8.py::MixedInt8GPT2Test::test_generate_quality_config` for XPU, pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43615",
+    "created_at": "2026-01-30T05:17:49Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43615/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43615",
+    "labels": [],
+    "merged": true,
+    "number": 43615,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add XPU expected output for MixedInt8GPT2Test",
+    "updated_at": "2026-04-13T02:40:20Z"
+  },
+  {
+    "additions": 2,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "We need to add contiguous operation in sdpa stage for xpu as well for this model so as we can use compile mode. pls help review, thx! @IlyasMoutawwakil",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43614",
+    "created_at": "2026-01-30T03:45:47Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43614/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43614",
+    "labels": [],
+    "merged": true,
+    "number": 43614,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add contiguous operation for diffllama model for xpu to enable compile mode.",
+    "updated_at": "2026-04-13T02:40:19Z"
+  },
+  {
+    "additions": 1347,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Add pipeline for sam/sam2/edgetam/sam3_tracker task",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43613",
+    "created_at": "2026-01-30T03:00:57Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43613/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43613",
+    "labels": [],
+    "merged": false,
+    "number": 43613,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Add Promptable Visual Segmentation pipeline",
+    "updated_at": "2026-02-03T16:17:10Z"
+  },
+  {
+    "additions": 1143,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Add pipeline for SAM3's PCS task",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43612",
+    "created_at": "2026-01-30T02:53:37Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43612/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43612",
+    "labels": [],
+    "merged": false,
+    "number": 43612,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Add Promptable Concept Segmentation pipeline",
+    "updated_at": "2026-02-03T14:49:14Z"
+  },
+  {
+    "additions": 1,
+    "author": "KOKOSde",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fix a spelling typo in the dynamic weight loading / weight converter documentation. **Change**: `formated` \u2192 `formatted` in `docs/source/en/weightconverter.md` ## Test plan - N/A (documentation-only change) Made with [Cursor](ht\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43610",
+    "created_at": "2026-01-30T02:06:10Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43610/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43610",
+    "labels": [],
+    "merged": true,
+    "number": 43610,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Docs: fix typo in weight converter guide",
+    "updated_at": "2026-02-03T15:38:52Z"
+  },
+  {
+    "additions": 242,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, draft to show what would be roughly needed",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43609",
+    "created_at": "2026-01-29T21:15:10Z",
+    "deletions": 212,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43609/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43609",
+    "labels": [],
+    "merged": false,
+    "number": 43609,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[`Rope`] Re-Enable remote code",
+    "updated_at": "2026-01-30T17:09:34Z"
+  },
+  {
+    "additions": 1,
+    "author": "tobyliu2004",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026tibility # What does this PR do? Fixes #43582 Fixes TypeError on Apple Silicon (MPS) when loading models with quantization by ensuring `byte_count // 2` returns a Python int. ## The Issue On line 4762 in `modeling_utils.py`, `torch.empty(\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43608",
+    "created_at": "2026-01-29T18:37:54Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43608/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43608",
+    "labels": [],
+    "merged": true,
+    "number": 43608,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Cast byte_count to int in caching_allocator_warmup for MPS compatibility",
+    "updated_at": "2026-02-02T16:55:51Z"
+  },
+  {
+    "additions": 15,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 `BarkCausalModel` and `BarkFineModel` incorrectly inferred the device, causing device mismatches when `enable_cpu_offload()` is used. For more details on reproducing the bug, please visit the linked issue! Fixes\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43607",
+    "created_at": "2026-01-29T18:31:20Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43607/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43607",
+    "labels": [],
+    "merged": true,
+    "number": 43607,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(models): Fix suno/bark-small CPU offload device mismatch causing CI failures",
+    "updated_at": "2026-04-18T08:37:39Z"
+  },
+  {
+    "additions": 4,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43605",
+    "created_at": "2026-01-29T16:22:35Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43605/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43605",
+    "labels": [],
+    "merged": false,
+    "number": 43605,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "undo processing test",
+    "updated_at": "2026-02-02T10:55:50Z"
+  },
+  {
+    "additions": 53,
+    "author": "vaibhav-research",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `TokenizersBackend._patch_mistral_regex()` is a Mistral-specific tokenizer patch, but the current implementation may call `huggingface_hub.model_info()` during detection. That triggers an HTTP request to `/api/model\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43603",
+    "created_at": "2026-01-29T15:30:56Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43603/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43603",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43603,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "Fix unintended Hub metadata calls from _patch_mistral_regex",
+    "updated_at": "2026-04-13T10:00:54Z"
+  },
+  {
+    "additions": 22,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR fixes two issues related to CB and serve: - when the `max_length` argument as set to `None` when creating a request, CB had an error because it always expected max_length to be an int. This is now an expected behavior, and we have\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43594",
+    "created_at": "2026-01-29T13:06:19Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43594/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43594",
+    "labels": [],
+    "merged": true,
+    "number": 43594,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[CB] [Serve] Fix broken serve tests",
+    "updated_at": "2026-01-30T13:10:52Z"
+  },
+  {
+    "additions": 32,
+    "author": "Vallabh-1504",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes a crash when initializing `Qwen3OmniMoeTalkerCodePredictorConfig` due to a missing attribute reference. Specifically, it: 1. **Removes** the reference to the non-existent `use_sliding_window` attribute\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43593",
+    "created_at": "2026-01-29T13:01:01Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43593/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43593",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43593,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "fix: AttributeError for Qwen3_omni_moe",
+    "updated_at": "2026-02-04T10:44:30Z"
+  },
+  {
+    "additions": 223,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43525 Fixes https://github.com/huggingface/transformers/issues/43572 Adds missing `pad_token_id` and `tie_word_embeddings` to config classes with their defaul\u2026",
+    "changed_files": 38,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43592",
+    "created_at": "2026-01-29T12:42:02Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43592/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43592",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43592,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fixes configuration default values",
+    "updated_at": "2026-01-30T11:37:26Z"
+  },
+  {
+    "additions": 15,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "custom tokenizers fail on super._init_() call that tries to update vocab size before all vocab attrs are defined",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43591",
+    "created_at": "2026-01-29T12:25:21Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43591/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43591",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43591,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "custom tok init fix",
+    "updated_at": "2026-02-02T17:00:18Z"
+  },
+  {
+    "additions": 5117,
+    "author": "molbap",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? In model additions, we often see old standards not using `check_model_inputs`, `can_return_tuple` and it's often a first review comment/something that can slip through. Doing a wide scan to try to remove all occurre\u2026",
+    "changed_files": 234,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 53,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43590",
+    "created_at": "2026-01-29T12:23:56Z",
+    "deletions": 12955,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43590/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43590",
+    "labels": [],
+    "merged": true,
+    "number": 43590,
+    "review_comments_count": 135,
+    "state": "closed",
+    "title": "Remove many output_attentions and other traced outputs on 100+ models ",
+    "updated_at": "2026-03-12T19:08:41Z"
+  },
+  {
+    "additions": 256,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. This simplifies the use of `LoadStateDictInfo` a bit everywhere, and makes it clear that the entries within the struct are mutated in-place all the time.",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43589",
+    "created_at": "2026-01-29T12:18:14Z",
+    "deletions": 271,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43589/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43589",
+    "labels": [],
+    "merged": true,
+    "number": 43589,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Simplify loading structure",
+    "updated_at": "2026-02-02T13:28:43Z"
+  },
+  {
+    "additions": 44,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43540",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43588",
+    "created_at": "2026-01-29T11:35:25Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43588/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43588",
+    "labels": [],
+    "merged": true,
+    "number": 43588,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Qwen3 omni - fix get video features",
+    "updated_at": "2026-01-29T14:55:30Z"
+  },
+  {
+    "additions": 336,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Check docstring currently doesn't check modular files. So if a class is defined in modular without any \"copying\" and has a bad docstring, we do not check it. Instead we check the auto-generated code, which sometimes\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43587",
+    "created_at": "2026-01-29T11:18:12Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43587/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43587",
+    "labels": [],
+    "merged": false,
+    "number": 43587,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] Check docstring runs on modular files",
+    "updated_at": "2026-01-29T11:27:23Z"
+  },
+  {
+    "additions": 8,
+    "author": "Wauplin",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR sets `HF_HUB_DOWNLOAD_TIMEOUT=60` as environment variable when running tests. I've also added [`pytest-env`](https://github.com/pytest-dev/pytest-env) as a dev dependency to do that. I've also reverted https\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43586",
+    "created_at": "2026-01-29T10:46:31Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43586/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43586",
+    "labels": [],
+    "merged": true,
+    "number": 43586,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Increase default ReadTimeout in tests",
+    "updated_at": "2026-01-29T12:10:13Z"
+  },
+  {
+    "additions": 76,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The change introduced in https://github.com/huggingface/transformers/pull/43261 with `__setattr__` is quite dangerous, as `source_patterns` and `target_patterns` live together and cannot be unentangled from one anot\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43585",
+    "created_at": "2026-01-29T10:22:53Z",
+    "deletions": 92,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43585/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43585",
+    "labels": [],
+    "merged": true,
+    "number": 43585,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Regex post processing in loading",
+    "updated_at": "2026-01-29T14:10:28Z"
+  },
+  {
+    "additions": 25,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Just reuse the local files as much as possible",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43584",
+    "created_at": "2026-01-29T09:38:40Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43584/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43584",
+    "labels": [],
+    "merged": true,
+    "number": 43584,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix mistral checkpoint loading in `utils/fetch_hub_objects_for_ci.py`: avoid too many requests and/or timeout",
+    "updated_at": "2026-01-29T11:29:13Z"
+  },
+  {
+    "additions": 3,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `torch.histc` with deterministic algorithms enabled behaves differently across devices: **CPU**: only supports float input **CUDA**: only supports int input This PR updates `grouped_mm_experts_forward` to use the ap\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43583",
+    "created_at": "2026-01-29T09:02:59Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43583/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43583",
+    "labels": [],
+    "merged": true,
+    "number": 43583,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[MoE] Use int input for histc on CUDA to support deterministic algorithms",
+    "updated_at": "2026-01-30T11:39:52Z"
+  },
+  {
+    "additions": 14,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- quantization: @SunMarc @MekkCyber",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43581",
+    "created_at": "2026-01-29T07:45:14Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43581/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43581",
+    "labels": [],
+    "merged": true,
+    "number": 43581,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "fix fbgemm fp8 multi-device load failure.",
+    "updated_at": "2026-02-10T14:51:49Z"
+  },
+  {
+    "additions": 2,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "We meet 2 failed test cases for lighton_ocr model: ``` tests/models/lighton_ocr/test_modeling_lighton_ocr.py::LightOnOcrForConditionalGenerationModelTest::test_torch_export tests/models/lighton_ocr/test_modeling_lighton_ocr.py::LightOnOcrF\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43580",
+    "created_at": "2026-01-29T07:24:34Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43580/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43580",
+    "labels": [],
+    "merged": false,
+    "number": 43580,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "skip torch export tests for lighton_ocr model",
+    "updated_at": "2026-04-13T02:40:18Z"
+  },
+  {
+    "additions": 13,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hi @ydshieh , please help review this PR, thanks!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43579",
+    "created_at": "2026-01-29T07:19:36Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43579/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43579",
+    "labels": [],
+    "merged": true,
+    "number": 43579,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add XPU support to the tests for solar_open",
+    "updated_at": "2026-01-29T13:18:47Z"
+  },
+  {
+    "additions": 6,
+    "author": "LuJunru",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43578",
+    "created_at": "2026-01-29T06:52:33Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43578/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43578",
+    "labels": [],
+    "merged": true,
+    "number": 43578,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Update test of Youtu-LLM to pr-aligned repos",
+    "updated_at": "2026-01-29T08:47:09Z"
+  },
+  {
+    "additions": 104,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In the latest version of transformers, when initializing siglip with ZeRO3 applied, the following error occurs: ```python Fan in and fan out can not be computed for tensor with fewer than 2 dimensions File \"/usr/loc\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43574",
+    "created_at": "2026-01-29T01:26:40Z",
+    "deletions": 105,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43574/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43574",
+    "labels": [],
+    "merged": true,
+    "number": 43574,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Preventing initialization of siglip's lecun_normal_, default_flax_embed_init in ZeRO3",
+    "updated_at": "2026-02-20T07:32:11Z"
+  },
+  {
+    "additions": 14,
+    "author": "tobyliu2004",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43572 Adds the missing `pad_token_id` parameter to `StableLmConfig` to resolve the `AttributeError` when creating StableLM models from config. ## Changes made: - Added `pad_token_id` parameter to `__init__`\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43573",
+    "created_at": "2026-01-29T01:10:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43573/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43573",
+    "labels": [],
+    "merged": false,
+    "number": 43573,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: Add missing pad_token_id to StableLmConfig",
+    "updated_at": "2026-01-29T18:17:40Z"
+  },
+  {
+    "additions": 2,
+    "author": "DowellHd",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR adds GIT to the auto-model mapping for the visual-question-answering pipeline and updates the corresponding GIT pipeline test mapping. This ensures that GIT models (e.g. GitForCausalLM) are correctly recogni\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43571",
+    "created_at": "2026-01-28T21:45:00Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43571/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43571",
+    "labels": [],
+    "merged": false,
+    "number": 43571,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add GIT support to visual-question-answering pipeline",
+    "updated_at": "2026-01-30T13:09:56Z"
+  },
+  {
+    "additions": 50,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Use git checkout command instead github api (otherwise we can't push the changes)",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43570",
+    "created_at": "2026-01-28T17:29:16Z",
+    "deletions": 87,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43570/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43570",
+    "labels": [],
+    "merged": true,
+    "number": 43570,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix repo. consistency bot (push permission issue)",
+    "updated_at": "2026-01-28T17:38:25Z"
+  },
+  {
+    "additions": 509,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR refactors all of the logic linked to the inputs and outputs of continuous batching into a new class `ContinuousBatchingIOs` itself in a new file. This will be quite useful when we introduce async scheduling, because there will be t\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43569",
+    "created_at": "2026-01-28T17:28:49Z",
+    "deletions": 416,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43569/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43569",
+    "labels": [],
+    "merged": true,
+    "number": 43569,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[CB] Refactor logic for inputs and outputs outside of the main API",
+    "updated_at": "2026-02-02T12:43:28Z"
+  },
+  {
+    "additions": 410,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR makes it easier to go through TrainingArguments ! This should help making trainer less bloated for the users. A nice first step to make Trainer simpler to use.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43568",
+    "created_at": "2026-01-28T17:07:41Z",
+    "deletions": 416,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43568/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43568",
+    "labels": [],
+    "merged": true,
+    "number": 43568,
+    "review_comments_count": 19,
+    "state": "closed",
+    "title": "Simplify TrainingArguments docstring",
+    "updated_at": "2026-02-03T12:52:10Z"
+  },
+  {
+    "additions": 73,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Kind of a follow up to #43556 which fixed a bunch of wrong inheritance with gradient ckpting. Just dummy exchanged the proper flags and I didn't have to skip a lot of these tests",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43567",
+    "created_at": "2026-01-28T16:48:38Z",
+    "deletions": 86,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43567/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43567",
+    "labels": [],
+    "merged": true,
+    "number": 43567,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Sam`] Fixup training flags",
+    "updated_at": "2026-02-02T11:12:21Z"
+  },
+  {
+    "additions": 28,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? https://github.com/huggingface/transformers/pull/43523 broke Wav2Vec and a few others as they have their own `tie_weights`, which is actually not tying the weights... Who would have thought that changing the purpose\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43566",
+    "created_at": "2026-01-28T16:47:58Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43566/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43566",
+    "labels": [],
+    "merged": true,
+    "number": 43566,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Wav2vec and a few others",
+    "updated_at": "2026-01-28T17:08:53Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Try if the permission is correctly set.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43565",
+    "created_at": "2026-01-28T15:51:09Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43565/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43565",
+    "labels": [],
+    "merged": true,
+    "number": 43565,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check/fix repo. check bot workflow",
+    "updated_at": "2026-01-28T16:00:19Z"
+  },
+  {
+    "additions": 26,
+    "author": "ndeybach",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Avoids flooding the tty when using Sam3VideoModel ( when using propagate_in_video_iterator() ) harmonize argument with other propagate_in_video_iterator in other similar classes (EdgeTamVideoModel, Sam2VideoModel, S\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43564",
+    "created_at": "2026-01-28T15:45:54Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43564/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43564",
+    "labels": [],
+    "merged": true,
+    "number": 43564,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add an option to disable Sam3VideoModel progress bar",
+    "updated_at": "2026-01-28T16:20:05Z"
+  },
+  {
+    "additions": 9,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43563",
+    "created_at": "2026-01-28T15:20:22Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43563/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43563",
+    "labels": [],
+    "merged": false,
+    "number": 43563,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[don't merge] check style bot from fored repo",
+    "updated_at": "2026-01-28T18:41:26Z"
+  },
+  {
+    "additions": 9,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is a draft / debug PR to check why the CI is red. No need to review.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43562",
+    "created_at": "2026-01-28T15:12:31Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43562/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43562",
+    "labels": [],
+    "merged": false,
+    "number": 43562,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Debug] [Draft] Investigating red CI",
+    "updated_at": "2026-01-28T17:39:37Z"
+  },
+  {
+    "additions": 0,
+    "author": "vkuzo",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Summary: This is being deprecated: https://github.com/pytorch/ao/issues/3739 Test Plan: ```python RUN_SLOW=1 pytest tests/quantization/torchao_integration/test_torchao.py -s ``` # What does this PR do? <!-- Congratulations! You've made it\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43561",
+    "created_at": "2026-01-28T15:04:44Z",
+    "deletions": 104,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43561/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43561",
+    "labels": [],
+    "merged": true,
+    "number": 43561,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "remove torchao.autoquant from transformers",
+    "updated_at": "2026-02-02T11:07:11Z"
+  },
+  {
+    "additions": 2,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We switched from `requests`, with no timeout by default, to `httpx`, which does have a read timeout by default. This causes some timeout errors in the CI, so this PR increases the timeout length by following the snippet [here](https://gith\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43560",
+    "created_at": "2026-01-28T14:42:21Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43560/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43560",
+    "labels": [],
+    "merged": true,
+    "number": 43560,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Increase timeout when preparing CI",
+    "updated_at": "2026-01-28T14:54:33Z"
+  },
+  {
+    "additions": 8,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Allow the encoder of T5Gemma2 to be loaded standalone ### Details This is valuable for Sentence Transformers, which may want to load the encoder only (see https://github.com/huggingface/sentence-transformers/pull/\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43559",
+    "created_at": "2026-01-28T14:41:34Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43559/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43559",
+    "labels": [],
+    "merged": true,
+    "number": 43559,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "[`feat`] Allow loading T5Gemma2Encoder with AutoModel",
+    "updated_at": "2026-02-03T22:21:11Z"
+  },
+  {
+    "additions": 9,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43558",
+    "created_at": "2026-01-28T14:08:17Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43558/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43558",
+    "labels": [],
+    "merged": false,
+    "number": 43558,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check style bot",
+    "updated_at": "2026-01-30T14:38:00Z"
+  },
+  {
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This test was taking 3 to 4mn on the CI `test_voxtral_tokenizer_converts_from_tekken` https://github.com/huggingface/transformers/blob/main/tests/models/auto/test_tokenization_auto.py#L213 Turns out we have a super\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43557",
+    "created_at": "2026-01-28T14:06:09Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43557/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43557",
+    "labels": [],
+    "merged": true,
+    "number": 43557,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "fix(converter): speed up `MistralConverter.extract_vocab_merges_from_model`",
+    "updated_at": "2026-01-29T09:15:11Z"
+  },
+  {
+    "additions": 51,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, it is an edge case for lasr only at the moment but #41212 will also need it. I think this is the easiest solution (and fastest) because there are several edge cases with additional bases.",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43556",
+    "created_at": "2026-01-28T13:57:52Z",
+    "deletions": 44,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43556/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43556",
+    "labels": [],
+    "merged": true,
+    "number": 43556,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "[`Modular`] Allow to add new bases that are not present in the inherited class",
+    "updated_at": "2026-01-28T16:33:01Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43555",
+    "created_at": "2026-01-28T13:55:17Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43555/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43555",
+    "labels": [],
+    "merged": true,
+    "number": 43555,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check PR bot permission - part 3 (try content attribute)",
+    "updated_at": "2026-01-28T14:04:54Z"
+  },
+  {
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43554",
+    "created_at": "2026-01-28T13:49:19Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43554/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43554",
+    "labels": [],
+    "merged": true,
+    "number": 43554,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check PR bot permission - part 2 (style only)",
+    "updated_at": "2026-01-28T13:50:00Z"
+  },
+  {
+    "additions": 18,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43553",
+    "created_at": "2026-01-28T13:41:21Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43553/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43553",
+    "labels": [],
+    "merged": true,
+    "number": 43553,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "check PR bot permission - part 1",
+    "updated_at": "2026-01-28T13:50:48Z"
+  },
+  {
+    "additions": 173,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "**Summary** This PR improves GPU monitoring performance by switching from threading to multiprocessing and using the amdsmi Python library instead of rocm-smi subprocess calls for AMD GPUs. **Changes** - Threading \u2192 Multiprocessing: GPU mo\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43552",
+    "created_at": "2026-01-28T13:35:17Z",
+    "deletions": 72,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43552/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43552",
+    "labels": [],
+    "merged": true,
+    "number": 43552,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "Improve GPU monitoring: switch to multiprocessing and use amdsmi for AMD GPUs",
+    "updated_at": "2026-01-29T09:12:50Z"
+  },
+  {
+    "additions": 2,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 Fix SDPA `torch.compile` failure in [Bamba-9B-v2](https://huggingface.co/ibm-ai-platform/Bamba-9B-v2). Fixes #43550. ### Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the oth\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43551",
+    "created_at": "2026-01-28T11:27:56Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43551/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43551",
+    "labels": [],
+    "merged": false,
+    "number": 43551,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(models): Bamba model fails with torch.compile when using SDPA",
+    "updated_at": "2026-04-18T08:37:56Z"
+  },
+  {
+    "additions": 15,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Before we were just silently skipping parameters that are passed by the user like `s_aux` in case they are not supported by the attention backend specified, it would be better to raise an exception instead. cc @dani\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43549",
+    "created_at": "2026-01-28T10:41:55Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43549/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43549",
+    "labels": [],
+    "merged": false,
+    "number": 43549,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "[kernels] exception handling for fa kernels",
+    "updated_at": "2026-01-28T14:52:46Z"
+  },
+  {
+    "additions": 9,
+    "author": "tomaszcichy98",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a bug where `Qwen2VLImageProcessorFast` doesn't set `min_pixels` and `max_pixels` instance attributes, breaking compatibility with code that expects these attributes. ## The Problem The slow processor (`Qwen2\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43548",
+    "created_at": "2026-01-28T10:35:52Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43548/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43548",
+    "labels": [],
+    "merged": false,
+    "number": 43548,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Qwen2VL] Fix missing min_pixels/max_pixels attributes in fast image processor",
+    "updated_at": "2026-01-29T20:12:47Z"
+  },
+  {
+    "additions": 14,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026gemmFp8Test::test_change_loading_attributes - quantization: @SunMarc @MekkCyber input_scale_ub is not correctly updated, since replace_with_fbgemm_fp8_linear is called under meta device.",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43547",
+    "created_at": "2026-01-28T08:37:08Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43547/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43547",
+    "labels": [],
+    "merged": true,
+    "number": 43547,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix the error of tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py::Fb\u2026",
+    "updated_at": "2026-02-10T14:36:36Z"
+  },
+  {
+    "additions": 0,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Script: ```python import torch from transformers import pipeline from datasets import load_dataset device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\" pipe = pipeline( \"automatic-speech-recognition\", model=\"openai/whisper-small\", dev\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43546",
+    "created_at": "2026-01-28T07:44:11Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43546/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43546",
+    "labels": [],
+    "merged": true,
+    "number": 43546,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Remove `num_frames` in ASR pipeline",
+    "updated_at": "2026-04-20T02:29:37Z"
+  },
+  {
+    "additions": 5,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- quantization: @SunMarc @MekkCyber should we remove such test. or else will throw error FAILED tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py::FbgemmFp8LinearTest::test_linear_preserves_shape - NameError: name 'quantize_fp8_per_row' is\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43545",
+    "created_at": "2026-01-28T06:40:55Z",
+    "deletions": 36,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43545/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43545",
+    "labels": [],
+    "merged": true,
+    "number": 43545,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "remove FbgemmFp8LinearTest",
+    "updated_at": "2026-01-29T12:37:16Z"
+  },
+  {
+    "additions": 1,
+    "author": "Olexandr88",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43544",
+    "created_at": "2026-01-28T06:28:03Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43544/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43544",
+    "labels": [],
+    "merged": true,
+    "number": 43544,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix typos in add_new_model_like docstrings",
+    "updated_at": "2026-03-11T13:58:05Z"
+  },
+  {
+    "additions": 22,
+    "author": "ITcarrot",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 22,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43543",
+    "created_at": "2026-01-28T04:38:01Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43543/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43543",
+    "labels": [],
+    "merged": false,
+    "number": 43543,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix fp16 underflow in MoE load balancing loss by enforcing fp32 softmax",
+    "updated_at": "2026-01-28T04:39:00Z"
+  },
+  {
+    "additions": 16,
+    "author": "ITcarrot",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes a bug in the router implementation of several MoE models (Qwen Moe like models, `Olmoe`, `FlexOlmo`). Previously, the raw `router_logits` were being overwritten by the result of the softmax operation:\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43542",
+    "created_at": "2026-01-28T04:07:48Z",
+    "deletions": 16,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43542/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43542",
+    "labels": [],
+    "merged": false,
+    "number": 43542,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: output router capture wrong router logits in qwen moe models",
+    "updated_at": "2026-01-28T10:25:19Z"
+  },
+  {
+    "additions": 4,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Llama4 look for `pad_token_id` on `self.config` in some cases, but I think it actually lives on `self.config.text_config`. This PR should fix things! There was a similar issue with Qwen3, but thankfully I couldn't find any other affected m\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43539",
+    "created_at": "2026-01-27T18:13:11Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43539/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43539",
+    "labels": [],
+    "merged": true,
+    "number": 43539,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Look for the pad_token_id in the right place for Llama4",
+    "updated_at": "2026-02-09T17:24:21Z"
+  },
+  {
+    "additions": 2455,
+    "author": "lashahub",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR adds support for **Music Flamingo**, NVIDIA's open large audio-language model designed for deep music understanding and reasoning. - **Paper**: [Music Flamingo: Scaling Music Understanding in Audio Language Models](https://huggingf\u2026",
+    "changed_files": 28,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 35,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43538",
+    "created_at": "2026-01-27T17:37:34Z",
+    "deletions": 73,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43538/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43538",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": true,
+    "number": 43538,
+    "review_comments_count": 102,
+    "state": "closed",
+    "title": "Add Music Flamingo",
+    "updated_at": "2026-04-04T20:28:56Z"
+  },
+  {
+    "additions": 125,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We get flaky generation tests in the CI a lot. A very common cause is the `has_similar_generate_outputs` helper function - this function is called by many tests, but it has very narrow tolerances by default in the CI (`atol=1e-5, rtol=1e-5\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43537",
+    "created_at": "2026-01-27T17:34:58Z",
+    "deletions": 74,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43537/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43537",
+    "labels": [],
+    "merged": true,
+    "number": 43537,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Operation Green CI II",
+    "updated_at": "2026-01-29T18:49:23Z"
+  },
+  {
+    "additions": 73,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The dependencies and extras have almost never been touched and are quite old. We are suffering quite a bit from the following: - duplicated dependencies (e.g. being still in extra etc even if they are main dependenc\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43536",
+    "created_at": "2026-01-27T17:15:28Z",
+    "deletions": 3600,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43536/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43536",
+    "labels": [],
+    "merged": true,
+    "number": 43536,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Rework dependencies and extras + Remove outdated `templates` folder",
+    "updated_at": "2026-01-28T17:46:10Z"
+  },
+  {
+    "additions": 2,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "#43530 fails because the test I wrote for verifying correct downloads fails on image files, which might have UTF-8 illegal bytes. Opening the file in `b` mode fixes it. This doesn't show up in the CI because the script is run when recreati\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43535",
+    "created_at": "2026-01-27T17:09:31Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43535/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43535",
+    "labels": [],
+    "merged": true,
+    "number": 43535,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix for #43530",
+    "updated_at": "2026-01-27T17:21:28Z"
+  },
+  {
+    "additions": 17,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "GPU monitoring causes ~2-3x slowdown on AMD GPUs during benchmark runs. This PR automatically disables GPU monitoring when an AMD GPU is detected, with a warning message explaining the reason. Users can still force-enable GPU monitoring wi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43534",
+    "created_at": "2026-01-27T16:51:26Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43534/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43534",
+    "labels": [],
+    "merged": false,
+    "number": 43534,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Benchmark] Disable GPU monitoring by default on AMD GPUs",
+    "updated_at": "2026-01-29T09:54:57Z"
+  },
+  {
+    "additions": 6,
+    "author": "Codalorian",
+    "author_association": "NONE",
+    "body_excerpt": "Phi functionality working very well. Llama models would probably be the next step to take.",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43533",
+    "created_at": "2026-01-27T16:51:08Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43533/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43533",
+    "labels": [],
+    "merged": false,
+    "number": 43533,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add LBNet functionality to Llama models next",
+    "updated_at": "2026-01-27T16:52:10Z"
+  },
+  {
+    "additions": 88,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43532",
+    "created_at": "2026-01-27T16:32:01Z",
+    "deletions": 31,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43532/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43532",
+    "labels": [],
+    "merged": false,
+    "number": 43532,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "[don't merge] Show diff",
+    "updated_at": "2026-01-28T08:36:34Z"
+  },
+  {
+    "additions": 89,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is the PR where I just keep patching things until it's green and then I merge",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43530",
+    "created_at": "2026-01-27T14:27:58Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43530/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43530",
+    "labels": [],
+    "merged": true,
+    "number": 43530,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "Operation Green CI",
+    "updated_at": "2026-01-27T16:50:42Z"
+  },
+  {
+    "additions": 1,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? We are standardizing kernel names in `kernels-community` to use `-` instead of `_`, this pr simply updates `cv_utils` new kernel is here with the latest torch version 2.10: https://huggingface.co/kernels-community/c\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43529",
+    "created_at": "2026-01-27T13:56:00Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43529/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43529",
+    "labels": [],
+    "merged": true,
+    "number": 43529,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[kernels] Update cv_utils name",
+    "updated_at": "2026-01-27T15:33:10Z"
+  },
+  {
+    "additions": 2,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? change `triton_kernels` name to `gpt-oss-triton-kernels`, no new failling tests related to this change",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43528",
+    "created_at": "2026-01-27T13:44:57Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43528/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43528",
+    "labels": [],
+    "merged": true,
+    "number": 43528,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Quantization] Fix triton_kernels name after being renamed to gpt-oss-triton-kernels",
+    "updated_at": "2026-01-28T10:45:35Z"
+  },
+  {
+    "additions": 30,
+    "author": "sbucaille",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43526 ## Who can review? @yonigozlan @molbap",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43527",
+    "created_at": "2026-01-27T13:29:06Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43527/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43527",
+    "labels": [],
+    "merged": true,
+    "number": 43527,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: return labels instead of label in reduce_label method in BeitImageProcessorFast",
+    "updated_at": "2026-01-28T01:05:25Z"
+  },
+  {
+    "additions": 25,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixed the `_apply_weight_conversions_to_state_dict` function to properly handle MoE weight conversions when the renamed key exists in model_state_dict. The bug was that when renamed_key (e.g., `gate_up_proj`) was fo\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43524",
+    "created_at": "2026-01-27T11:29:22Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43524/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43524",
+    "labels": [],
+    "merged": true,
+    "number": 43524,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[DeepSpeed] properly handle MoE weight conversion",
+    "updated_at": "2026-02-10T19:24:06Z"
+  },
+  {
+    "additions": 52,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/43522. TLDR we want to skip tying when inside `from_pretrained` (so we add the context manager), but always tie when initializing from config (even with meta de\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43523",
+    "created_at": "2026-01-27T10:53:08Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43523/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43523",
+    "labels": [],
+    "merged": true,
+    "number": 43523,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Tie the weights even if initializing from a config on meta device",
+    "updated_at": "2026-01-27T15:45:24Z"
+  },
+  {
+    "additions": 228,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR introduces minor changes to the continuous batching feature: #### Performance - optimize the `get_seqlens_k` function to avoid looping over all cache managers - logits indexing is now done inside the cuda graph, as there\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43521",
+    "created_at": "2026-01-27T10:07:50Z",
+    "deletions": 169,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43521/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43521",
+    "labels": [],
+    "merged": true,
+    "number": 43521,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[CB] Minor perf improvements and ty compatibility",
+    "updated_at": "2026-01-28T10:39:49Z"
+  },
+  {
+    "additions": 19,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Commit 8dd9c999a6262d6ceb48f4a2da7acaccfa80e3bc introduced a regression by unconditionally reinitializing BatchNorm2d buffers (running_mean, running_var, num_batches_tracked) in the _init_weights() method. The probl\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43520",
+    "created_at": "2026-01-27T08:51:41Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43520/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43520",
+    "labels": [],
+    "merged": true,
+    "number": 43520,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix: initialize BatchNorm2d buffers only on meta",
+    "updated_at": "2026-01-27T13:28:29Z"
+  },
+  {
+    "additions": 6,
+    "author": "ariG23498",
+    "author_association": "MEMBER",
+    "body_excerpt": "CC: @MekkCyber",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43518",
+    "created_at": "2026-01-27T07:23:19Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43518/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43518",
+    "labels": [],
+    "merged": true,
+    "number": 43518,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Kernels] kernel migration updates for activation kernels",
+    "updated_at": "2026-02-04T08:48:42Z"
+  },
+  {
+    "additions": 31,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When using that sp(sequence parallel) in evaluate, the sequence length must be divisible by `world_size`, and the eval batch size must be the same as the train batch size to be able to use it, and this is a PR to mo\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43517",
+    "created_at": "2026-01-27T07:17:42Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43517/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43517",
+    "labels": [],
+    "merged": true,
+    "number": 43517,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Make it possible to evaluate when using sequence parallel in HF Trainer",
+    "updated_at": "2026-02-20T07:32:13Z"
+  },
+  {
+    "additions": 18,
+    "author": "yousheng-chen",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43516",
+    "created_at": "2026-01-27T04:13:32Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43516/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43516",
+    "labels": [],
+    "merged": false,
+    "number": 43516,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Hub creat branch",
+    "updated_at": "2026-01-27T04:13:52Z"
+  },
+  {
+    "additions": 17,
+    "author": "yousheng-chen",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43515",
+    "created_at": "2026-01-27T03:43:11Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43515/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43515",
+    "labels": [],
+    "merged": false,
+    "number": 43515,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "New branch",
+    "updated_at": "2026-01-27T06:58:27Z"
+  },
+  {
+    "additions": 33586,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Image Processor Backend Refactor ## Summary Replaces the dual-file `BaseImageProcessor` (slow/PIL) + `BaseImageProcessorFast` (fast/torchvision) design with a unified backend architecture. The `image_processing_utils_fast` module is remo\u2026",
+    "changed_files": 675,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43514",
+    "created_at": "2026-01-27T03:32:12Z",
+    "deletions": 57296,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43514/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43514",
+    "labels": [],
+    "merged": true,
+    "number": 43514,
+    "review_comments_count": 62,
+    "state": "closed",
+    "title": "\ud83d\udea8\ud83d\udea8 Refactor Image Processors to support different backends",
+    "updated_at": "2026-03-19T14:47:57Z"
+  },
+  {
+    "additions": 36,
+    "author": "heathdutton",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "For DETR-derived models that use sigmoid/focal loss (Deformable DETR, Grounding DINO, LW-DETR, RT-DETR, D-FINE), the cardinality error calculation was incorrect. These models don't have an explicit background class, so checking `argmax(-1)\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43513",
+    "created_at": "2026-01-27T02:30:53Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43513/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43513",
+    "labels": [],
+    "merged": true,
+    "number": 43513,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix cardinality error for DETR models without explicit background class",
+    "updated_at": "2026-02-09T17:30:43Z"
+  },
+  {
+    "additions": 83,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "CPU mxfp4 moe kernel already implemented. Fix the check to enable mxfp4 on CPU. Waiting for CPU kernel merge and release.",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43512",
+    "created_at": "2026-01-27T02:14:48Z",
+    "deletions": 42,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43512/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43512",
+    "labels": [],
+    "merged": true,
+    "number": 43512,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Enable mxfp4 model on CPU",
+    "updated_at": "2026-04-20T02:29:38Z"
+  },
+  {
+    "additions": 1,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026onTest::test_inference_mask_generation_batched_points_batched_images pass in xpu @ydshieh",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43511",
+    "created_at": "2026-01-27T01:27:43Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43511/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43511",
+    "labels": [],
+    "merged": true,
+    "number": 43511,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update `SamHQModelIntegrationTest::test_inference_mask_generation_batched_points_batched_images` for `XPU`",
+    "updated_at": "2026-01-27T09:09:48Z"
+  },
+  {
+    "additions": 18,
+    "author": "pstjohn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes and re-enables the skipped `extra_state` tests, since we use this functionality when embedding NVIDIA TransformerEngine layers in PreTrainedModels. Thanks! @ArthurZucker for review",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43510",
+    "created_at": "2026-01-26T23:57:17Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43510/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43510",
+    "labels": [],
+    "merged": true,
+    "number": 43510,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix and re-enable extra_state tests",
+    "updated_at": "2026-03-09T12:05:30Z"
+  },
+  {
+    "additions": 9,
+    "author": "oliverholworthy",
+    "author_association": "NONE",
+    "body_excerpt": "This PR restores compatibility with PyTorch versions < 2.4 while preserving the current behavior on newer versions. `torch.is_autocast_enabled(device_type)` was introduced in PyTorch 2.4, but Transformers currently supports torch>=2.2. On\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43509",
+    "created_at": "2026-01-26T21:45:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43509/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43509",
+    "labels": [],
+    "merged": false,
+    "number": 43509,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Guard torch.is_autocast_enabled(device_type) for torch<2.4",
+    "updated_at": "2026-01-30T13:25:33Z"
+  },
+  {
+    "additions": 30,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? PR #42845 fails some workflow runs ``` File \"/home/runner/work/transformers/transformers/utils/get_ci_error_statistics.py\", line 29, in get_jobs result = httpx.get(url + f\"&page={i + 2}\", headers=headers).json() Fil\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43507",
+    "created_at": "2026-01-26T20:01:30Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43507/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43507",
+    "labels": [],
+    "merged": true,
+    "number": 43507,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Revert utils files changes from db1e6f1e",
+    "updated_at": "2026-01-26T20:50:06Z"
+  },
+  {
+    "additions": 1261,
+    "author": "reach-Harishapc",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "- Implement RishAIModel, RishAICausalLM with proper inheritance - Add RishAIConfig with full MoE and attention parameters - Integrate RishAITokenizer with BPE support - 100% test coverage with comprehensive test suite - Compatible with tra\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43506",
+    "created_at": "2026-01-26T19:38:43Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43506/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43506",
+    "labels": [],
+    "merged": false,
+    "number": 43506,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add RishAI model with full transformers integration",
+    "updated_at": "2026-01-27T19:52:25Z"
+  },
+  {
+    "additions": 4,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 Add backward compat for legacy `segmentation_indices` field in `BeitConfig`. \u2192 Fix the conversion script to set `out_indices` for base models; should the model checkpoints be re-exported after this PR. \u2192 Improve\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43505",
+    "created_at": "2026-01-26T18:34:05Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43505/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43505",
+    "labels": [],
+    "merged": true,
+    "number": 43505,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(models): Migrate legacy segmentation_indices to out_indices in BeitConfig",
+    "updated_at": "2026-02-23T10:08:28Z"
+  },
+  {
+    "additions": 59,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Allow it to do a subset of simpler fixes instead a full set.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43503",
+    "created_at": "2026-01-26T16:50:27Z",
+    "deletions": 33,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43503/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43503",
+    "labels": [],
+    "merged": true,
+    "number": 43503,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Enhance repo consistence bot",
+    "updated_at": "2026-01-26T17:13:34Z"
+  },
+  {
+    "additions": 121,
+    "author": "mbtariq82",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43501",
+    "created_at": "2026-01-26T16:18:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43501/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43501",
+    "labels": [],
+    "merged": false,
+    "number": 43501,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Added tokenizer encoder",
+    "updated_at": "2026-01-26T16:43:03Z"
+  },
+  {
+    "additions": 10,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "[don't merge] check bot permission",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43500",
+    "created_at": "2026-01-26T15:12:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43500/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43500",
+    "labels": [],
+    "merged": false,
+    "number": 43500,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[don't merge] check bot permission",
+    "updated_at": "2026-01-28T15:11:49Z"
+  },
+  {
+    "additions": 12,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "Adds the missing `from_dict()` classmethod to the `GPURawMetrics` dataclass in `benchmark_v2/framework/hardware_metrics.py`. (`BenchmarkResult.from_dict()` is used at line 144 of `data_classes.py`)",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43499",
+    "created_at": "2026-01-26T14:43:46Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43499/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43499",
+    "labels": [],
+    "merged": true,
+    "number": 43499,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add missing GPURawMetrics.from_dict() method in benchmark_v2",
+    "updated_at": "2026-01-26T15:52:58Z"
+  },
+  {
+    "additions": 11,
+    "author": "marcndo",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix backward compatibility issue for tie_weights <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set,\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43498",
+    "created_at": "2026-01-26T14:17:56Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43498/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43498",
+    "labels": [],
+    "merged": false,
+    "number": 43498,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix/backward compatibility for tie_weights",
+    "updated_at": "2026-01-26T16:29:08Z"
+  },
+  {
+    "additions": 3,
+    "author": "eldarkurtic",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Loading Llama-4 model with `Llama4ForConditionalGeneration` fails because `self.config.pad_token_id` doesn't exist. For Llama-4 models, `pad_token_id` is inside `text_config` not the general config.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43497",
+    "created_at": "2026-01-26T13:53:13Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43497/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43497",
+    "labels": [],
+    "merged": false,
+    "number": 43497,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Bugfix] Extract pad_token_id from text config for Llama-4",
+    "updated_at": "2026-02-10T09:13:12Z"
+  },
+  {
+    "additions": 89,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes the following regression: ```python from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(\"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2\") sentences = [\"C'est u\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43496",
+    "created_at": "2026-01-26T13:24:45Z",
+    "deletions": 65,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43496/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43496",
+    "labels": [],
+    "merged": false,
+    "number": 43496,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Revert tokenization regression, add test",
+    "updated_at": "2026-01-28T10:04:07Z"
+  },
+  {
+    "additions": 157,
+    "author": "leoneperdigao",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary <!-- Add a brief summary of changes --> ## Related Issue Fixes #43408 **Issue:** Warning: You are using a model of type sam3_video to instantiate a model of type sam3_tracker **URL:** https://github.com/huggingface/transformers/\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43495",
+    "created_at": "2026-01-26T12:46:21Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43495/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43495",
+    "labels": [],
+    "merged": true,
+    "number": 43495,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "fix: add compatible_model_types to suppress model type mismatch warnings",
+    "updated_at": "2026-02-05T13:31:24Z"
+  },
+  {
+    "additions": 20,
+    "author": "githubnemo",
+    "author_association": "MEMBER",
+    "body_excerpt": "The Qwen3 MoE config was missing the mapping attribute for the num_expert_local config variable which made it impossible to load FP8 quantized models, due to the following exception: ``` Traceback (most recent call last): File \".../exps/tr\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43494",
+    "created_at": "2026-01-26T11:34:05Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43494/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43494",
+    "labels": [],
+    "merged": true,
+    "number": 43494,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix loading of Qwen3 FP8",
+    "updated_at": "2026-01-27T09:56:23Z"
+  },
+  {
+    "additions": 54,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43492",
+    "created_at": "2026-01-26T10:30:53Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43492/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43492",
+    "labels": [],
+    "merged": false,
+    "number": 43492,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Perception Encoder follow up PR",
+    "updated_at": "2026-01-26T12:55:35Z"
+  },
+  {
+    "additions": 605,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "DRAFT FOR DISCUSSION # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great ti\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43491",
+    "created_at": "2026-01-26T10:30:51Z",
+    "deletions": 18,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43491/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43491",
+    "labels": [],
+    "merged": false,
+    "number": 43491,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "Improved new model template",
+    "updated_at": "2026-02-24T11:44:40Z"
+  },
+  {
+    "additions": 539,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? It makes sure `extras` can be installed on all supported Python versions. - cleaned up extras (removed natten, tweaked mistral-common etc,) - adds a supported Python version range (10->14) - dynamically update the m\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43490",
+    "created_at": "2026-01-26T10:27:44Z",
+    "deletions": 50,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43490/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43490",
+    "labels": [],
+    "merged": true,
+    "number": 43490,
+    "review_comments_count": 21,
+    "state": "closed",
+    "title": "Fix extras on all supported Python versions",
+    "updated_at": "2026-01-30T15:14:55Z"
+  },
+  {
+    "additions": 10,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? check repo bot, don't merge",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43488",
+    "created_at": "2026-01-26T10:13:21Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43488/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43488",
+    "labels": [],
+    "merged": false,
+    "number": 43488,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[don't merge] bad format to check repo bot",
+    "updated_at": "2026-01-26T17:19:16Z"
+  },
+  {
+    "additions": 6,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "Resolves #43474 Resolves #43475 # What does this PR do? * Removes `fpn_position_embeddings`, should have been `fpn_position_encoding` all along. Affected 3 architectures, and was introduced in #42564 3 days ago. * Uses `...get_text_feature\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43487",
+    "created_at": "2026-01-26T10:08:59Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43487/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43487",
+    "labels": [],
+    "merged": true,
+    "number": 43487,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`fix`] Sam3Video: Avoid fpn_position_embedding; use pooler_output",
+    "updated_at": "2026-01-26T10:38:53Z"
+  },
+  {
+    "additions": 23,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, it was not working when input was a batched 5D array Fixes https://github.com/huggingface/transformers/issues/43450",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43486",
+    "created_at": "2026-01-26T09:54:12Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43486/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43486",
+    "labels": [],
+    "merged": true,
+    "number": 43486,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `make_batched_video` with 5D arrays",
+    "updated_at": "2026-01-30T10:27:50Z"
+  },
+  {
+    "additions": 1,
+    "author": "LysandreJik",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43485",
+    "created_at": "2026-01-26T09:51:52Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43485/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43485",
+    "labels": [],
+    "merged": true,
+    "number": 43485,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Revise MIGRATION_GUIDE_V5.md for version 5 updates",
+    "updated_at": "2026-01-26T10:01:11Z"
+  },
+  {
+    "additions": 70,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Optimizes timestamp rendering in Ernie 4.5 VL video processing by caching text overlays and using torch alpha blending instead of slow `torch->PIL->torch` conversion for each frame. **Performance improvement:** | Mo\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43484",
+    "created_at": "2026-01-26T09:40:50Z",
+    "deletions": 25,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43484/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43484",
+    "labels": [],
+    "merged": false,
+    "number": 43484,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Optimize Ernie 4.5 VL timestamp rendering with cached overlays",
+    "updated_at": "2026-01-26T09:41:54Z"
+  },
+  {
+    "additions": 36,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Uses bucket=module to randomize tests within files, compatible with --dist=loadfile. CI uses CIRCLE_BUILD_NUM as seed for reproducibility across parallel containers. Local runs use random seed to catch order depende\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43483",
+    "created_at": "2026-01-26T08:34:02Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43483/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43483",
+    "labels": [],
+    "merged": true,
+    "number": 43483,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Add pytest-random-order for reproducible test randomization",
+    "updated_at": "2026-01-26T16:02:04Z"
+  },
+  {
+    "additions": 2,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a bug where `vision_eager_attention_forward` ignored the passed `scaling` parameter and used `module.head_dim**-0.5` instead. This causes incorrect attention scores under Tensor Parallelism (TP) where head dim\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43481",
+    "created_at": "2026-01-26T05:55:08Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43481/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43481",
+    "labels": [],
+    "merged": false,
+    "number": 43481,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Llama4 vision attention scaling for TP compatibility",
+    "updated_at": "2026-01-27T09:52:18Z"
+  },
+  {
+    "additions": 4,
+    "author": "charlieJ107",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## \ud83d\udc1b Bug Fix: Phi4MultimodalConfig default sub-config initialization This PR fixes two issues in `Phi4MultimodalConfig.__init__` related to default initialization of multimodal sub-configs. Rations in Phi4MultimodalConfig # What does this\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43480",
+    "created_at": "2026-01-26T01:32:08Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43480/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43480",
+    "labels": [],
+    "merged": true,
+    "number": 43480,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(Phi4Multimodal): Fix incorrect default vision/audio config initialization in Phi4MultimodalConfig",
+    "updated_at": "2026-01-26T14:02:23Z"
+  },
+  {
+    "additions": 139,
+    "author": "karthikthota-03",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "**Related Issue** Fixes #43472 **Overview** This PR is to refactor the expert implementation in Qwen2Moe by introducing a standardized BatchLinear utility. The current implementation of Qwen2MoeExperts relies on a custom expert registry an\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43478",
+    "created_at": "2026-01-25T16:26:24Z",
+    "deletions": 45,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43478/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43478",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43478,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor: use BatchLinear in Qwen2Moe to enable PEFT/LoRA support",
+    "updated_at": "2026-01-26T13:31:10Z"
+  },
+  {
+    "additions": 2,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Remove outdated TODO comments claiming patch embedding weight tying is \"not working\". ## Details Testing confirms the tying mechanism works correctly: - `patch_embed.proj.weight` and `encoder.embed_patches.proj.weight` share the\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43477",
+    "created_at": "2026-01-25T13:30:13Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43477/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43477",
+    "labels": [],
+    "merged": true,
+    "number": 43477,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove stale TODO comments in UDOP tied weights",
+    "updated_at": "2026-01-26T14:36:01Z"
+  },
+  {
+    "additions": 39,
+    "author": "Aakash0440",
+    "author_association": "NONE",
+    "body_excerpt": "When passing a batched tensor (B x T x C x H x W) directly to AutoVideoProcessor, the processor incorrectly adds extra singleton dimensions, returning a tensor of shape: 1 x 1 x B x T x C x H x W instead of the expected: B x T x C x H x W\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43476",
+    "created_at": "2026-01-25T09:28:44Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43476/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43476",
+    "labels": [],
+    "merged": false,
+    "number": 43476,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: AutoVideoProcessor returns incorrect shape for batched tensor inputs (#43450)",
+    "updated_at": "2026-01-26T13:22:04Z"
+  },
+  {
+    "additions": 2,
+    "author": "xenova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43473",
+    "created_at": "2026-01-25T03:31:50Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43473/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43473",
+    "labels": [],
+    "merged": true,
+    "number": 43473,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Apertus model loading (NotImplementedError: Cannot copy out of meta tensor; no data!)",
+    "updated_at": "2026-02-03T09:50:39Z"
+  },
+  {
+    "additions": 37,
+    "author": "antznette1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Improve the UX of `transformers serve` when serving dependencies are not installed by raising an actionable `ImportError` that lists the missing packages and provides copy/pastable installation commands. ## What changed - Update\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43471",
+    "created_at": "2026-01-25T01:29:41Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43471/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43471",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43471,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "cli: Improve serve missing dependency error",
+    "updated_at": "2026-01-26T13:52:32Z"
+  },
+  {
+    "additions": 1,
+    "author": "xander1421",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary `MusicgenForConditionalGeneration` inherits from `MusicgenPreTrainedModel` which has `config_class` implicitly set to `MusicgenDecoderConfig` via the type annotation `config: MusicgenDecoderConfig`. However, `MusicgenForConditio\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43470",
+    "created_at": "2026-01-25T00:29:47Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43470/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43470",
+    "labels": [],
+    "merged": false,
+    "number": 43470,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix MusicgenForConditionalGeneration config_class inheritance",
+    "updated_at": "2026-02-10T19:50:23Z"
+  },
+  {
+    "additions": 17,
+    "author": "antznette1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Improve HfArgumentParser boolean UX so `Optional[bool]` arguments can be passed as flags without an explicit value. ## What changed - Updated HfArgumentParser to treat `typing.Optional[bool]` `(Union[bool, None])` the same as `b\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43469",
+    "created_at": "2026-01-24T23:36:23Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43469/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43469",
+    "labels": [],
+    "merged": false,
+    "number": 43469,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "argparser: Allow optional bool flags without values",
+    "updated_at": "2026-01-26T15:29:00Z"
+  },
+  {
+    "additions": 99,
+    "author": "sherlock-488",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #35532 ### What does this PR do? - Adds `encode()` to `RagTokenizer` by forwarding to `current_tokenizer`. - Adds `patch_token` / `patch_token_id` getters and setters, also forwarding to `current_tokenizer`. - Adds a lightweight regr\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43468",
+    "created_at": "2026-01-24T21:20:28Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43468/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43468",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43468,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "RagTokenizer: add encode and patch_token(_id) forwarding",
+    "updated_at": "2026-03-21T05:45:45Z"
+  },
+  {
+    "additions": 3,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43344 The integration tests for GLM-Image were failing because they were attempting to load the `zai-org/GLM-4.5V` checkpoint (a GLM-4V MoE model) using `GlmImageForConditionalGeneration` classes, causing ar\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43467",
+    "created_at": "2026-01-24T18:06:10Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43467/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43467",
+    "labels": [],
+    "merged": false,
+    "number": 43467,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix GLM-Image integration tests to use correct model class",
+    "updated_at": "2026-01-26T15:29:58Z"
+  },
+  {
+    "additions": 43,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes the TODO in `loss_for_object_detection.py:196` - mask loss now properly ignores padding areas. When batching images of different sizes, `nested_tensor_from_tensor_list` pads smaller images. Previously, padding\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43466",
+    "created_at": "2026-01-24T17:40:23Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43466/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43466",
+    "labels": [],
+    "merged": false,
+    "number": 43466,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix mask loss to ignore padding areas in object detection",
+    "updated_at": "2026-01-24T17:50:39Z"
+  },
+  {
+    "additions": 6,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #43452",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43465",
+    "created_at": "2026-01-24T17:34:36Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43465/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43465",
+    "labels": [],
+    "merged": true,
+    "number": 43465,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix gguf recent conversion issues",
+    "updated_at": "2026-01-24T18:07:17Z"
+  },
+  {
+    "additions": 1,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes failing [MarkupLMModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/21307980813/job/61339620056#step:14:1451). ## Before submitting - [ ] This PR fixes a typo or improves the docs (\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43464",
+    "created_at": "2026-01-24T17:08:50Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43464/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43464",
+    "labels": [],
+    "merged": true,
+    "number": 43464,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix failing `markuplm` integration tests",
+    "updated_at": "2026-01-26T16:24:07Z"
+  },
+  {
+    "additions": 4,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #42890 by addressing the **root cause**: a weight loading bug introduced in commit `0b369802cf`. ## Root Cause Commit `0b369802cf` (\"fix sam family!\") removed `_tied_weights_keys` because positional embedding\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43463",
+    "created_at": "2026-01-24T15:30:47Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43463/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43463",
+    "labels": [],
+    "merged": false,
+    "number": 43463,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix SAM-HQ weight loading bug causing flaky tests",
+    "updated_at": "2026-01-26T15:54:07Z"
+  },
+  {
+    "additions": 65,
+    "author": "lllangWV",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? The dataclass output attribute was named `fpn_position_encoding` but code in `_prepare_vision_features` was accessing `fpn_position_embeddings`, causing an AttributeError. Renamed to `fpn_position_embeddings` across\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43462",
+    "created_at": "2026-01-24T15:16:48Z",
+    "deletions": 65,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43462/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43462",
+    "labels": [],
+    "merged": false,
+    "number": 43462,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(models): Rename fpn_position_encoding to fpn_position_embeddings",
+    "updated_at": "2026-01-26T13:16:57Z"
+  },
+  {
+    "additions": 51,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Moves `time_step_min`, `time_step_max`, and `time_step_limit` from hardcoded values to configurable parameters in Bamba, FalconH1, and GraniteMoeHybrid models. These parameters were previously hardcoded with a `# FI\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43461",
+    "created_at": "2026-01-24T14:35:04Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43461/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43461",
+    "labels": [],
+    "merged": true,
+    "number": 43461,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Move hardcoded time_step params to config for Bamba, FalconH1, GraniteMoeHybrid",
+    "updated_at": "2026-01-26T18:29:49Z"
+  },
+  {
+    "additions": 48,
+    "author": "udaymehta",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR addresses the `TODO` placeholders left in `src/transformers/models/llama4/configuration_llama4.py`. The `Llama4VisionConfig` and `Llama4TextConfig` classes contained several \"TODO\" markers in their docstring\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43460",
+    "created_at": "2026-01-24T07:17:10Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43460/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43460",
+    "labels": [],
+    "merged": true,
+    "number": 43460,
+    "review_comments_count": 19,
+    "state": "closed",
+    "title": "[Docs] Complete missing Llama4 configuration docs",
+    "updated_at": "2026-03-03T04:14:38Z"
+  },
+  {
+    "additions": 20,
+    "author": "josephrocca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Without this fix, this crashes: ```py from transformers import T5TokenizerFast tok = T5TokenizerFast.from_pretrained(\"t5-small\") tok(\"ok\" + \"\\ud800\" + \"bad\") ``` The error message implies a wrong input type: `TypeError: TextEncodeInput mus\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43459",
+    "created_at": "2026-01-24T04:57:20Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43459/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43459",
+    "labels": [],
+    "merged": false,
+    "number": 43459,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Sanitize invalid UTF\u201116 surrogate codepoints before calling the Rust encode_batch, replacing them with U+FFFD",
+    "updated_at": "2026-01-25T08:10:37Z"
+  },
+  {
+    "additions": 1277,
+    "author": "lashahub",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR adds support for [Music Flamingo](https://huggingface.co/nvidia/music-flamingo-2601-hf) to [Audio Flamingo 3](https://huggingface.co/docs/transformers/en/model_doc/audioflamingo3), NVIDIA's open large audio-language model designed\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43458",
+    "created_at": "2026-01-24T04:02:17Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43458/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43458",
+    "labels": [],
+    "merged": false,
+    "number": 43458,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Music Flamingo support to Audio Flamingo 3",
+    "updated_at": "2026-04-04T20:28:56Z"
+  },
+  {
+    "additions": 160,
+    "author": "floor-licker",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary It seems like this was a known issue, but the continuous batching implementation packs multiple sequences into a single token stream even though most generation-time logits processors assume `[batch, seq_len]` / `[batch, vocab]`\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43457",
+    "created_at": "2026-01-23T23:14:41Z",
+    "deletions": 31,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43457/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43457",
+    "labels": [],
+    "merged": false,
+    "number": 43457,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "fix(continuous-batching): apply logits processors in packed batches",
+    "updated_at": "2026-04-07T08:30:19Z"
+  },
+  {
+    "additions": 18,
+    "author": "sherlock-488",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #13244 ### What does this PR do? Ensure empty-string table cells are mapped to the TAPAS empty token instead of being silently dropped during table tokenization. ### Tests python -m pytest tests/models/tapas/test_tokenization_tapas.p\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43456",
+    "created_at": "2026-01-23T20:57:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43456/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43456",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43456,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix TapasTokenizer table tokenization for empty string cells",
+    "updated_at": "2026-01-26T12:57:22Z"
+  },
+  {
+    "additions": 4,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does the PR do? Fixes weight tying between `lm_head` and `embed_tokens` by exposing `tie_word_embeddings` at the top-level config, following the canonical repo pattern. Upon inspecting both presets from the [AyaVision collection](\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43455",
+    "created_at": "2026-01-23T18:36:09Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43455/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43455",
+    "labels": [],
+    "merged": true,
+    "number": 43455,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(models): Add tie_word_embeddings parameter to AyaVisionConfig for proper weight tying",
+    "updated_at": "2026-01-29T18:01:24Z"
+  },
+  {
+    "additions": 44,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes these failing [PhiIntegrationTest](https://github.com/huggingface/transformers/actions/runs/21272911412/job/61226967021#step:14:949), [Glm46VIntegrationTest](https://github.com/huggingface/transformers/actions\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43453",
+    "created_at": "2026-01-23T17:10:04Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43453/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43453",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43453,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix failing tests due to no attribute `pad_token_id`",
+    "updated_at": "2026-01-29T13:57:38Z"
+  },
+  {
+    "additions": 6657,
+    "author": "SangbumChoi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 24,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43451",
+    "created_at": "2026-01-23T14:47:55Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43451/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43451",
+    "labels": [],
+    "merged": false,
+    "number": 43451,
+    "review_comments_count": 116,
+    "state": "open",
+    "title": "Add Molmo2",
+    "updated_at": "2026-04-18T07:09:12Z"
+  },
+  {
+    "additions": 6676,
+    "author": "SangbumChoi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 20,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43448",
+    "created_at": "2026-01-23T14:12:05Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43448/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43448",
+    "labels": [],
+    "merged": false,
+    "number": 43448,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add Molmo",
+    "updated_at": "2026-03-13T10:54:24Z"
+  },
+  {
+    "additions": 1154,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR updates the `can_return_tuple` and ~~`check_model_inputs`~~ `capture_outputs` (see https://github.com/huggingface/transformers/pull/43446#issuecomment-3932796710) typings such that: ```python from transforme\u2026",
+    "changed_files": 241,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43446",
+    "created_at": "2026-01-23T12:58:02Z",
+    "deletions": 775,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43446/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43446",
+    "labels": [],
+    "merged": false,
+    "number": 43446,
+    "review_comments_count": 26,
+    "state": "open",
+    "title": "[`typings`] Automatically type decorator return types as `tuple | X`",
+    "updated_at": "2026-03-02T12:09:01Z"
+  },
+  {
+    "additions": 49,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43445",
+    "created_at": "2026-01-23T12:44:25Z",
+    "deletions": 113,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43445/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43445",
+    "labels": [],
+    "merged": true,
+    "number": 43445,
+    "review_comments_count": 15,
+    "state": "closed",
+    "title": "Fix some MoE routers",
+    "updated_at": "2026-01-27T13:33:14Z"
+  },
+  {
+    "additions": 38,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43441 Pixtral always has packed inputs because inputs images are of different lengths. However position ids was a 1D tensor causing an issue in FA2. Unsqueezi\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43443",
+    "created_at": "2026-01-23T12:07:28Z",
+    "deletions": 36,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43443/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43443",
+    "labels": [],
+    "merged": true,
+    "number": 43443,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Fix Pixtral with flash attention",
+    "updated_at": "2026-01-26T08:18:43Z"
+  },
+  {
+    "additions": 12,
+    "author": "merveenoyan",
+    "author_association": "MEMBER",
+    "body_excerpt": "@stevhliu I also need to remove IDEFICS example (too old) and fix image captioning one (dataset was taken down, although it's hard to find an unbroken image captioning dataset), will do on a follow-up PR",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43442",
+    "created_at": "2026-01-23T11:03:23Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43442/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43442",
+    "labels": [],
+    "merged": true,
+    "number": 43442,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add trackio to training notebooks",
+    "updated_at": "2026-01-27T14:43:49Z"
+  },
+  {
+    "additions": 6,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Simply adds some shape comments to clarify more the moe forward implementation",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43439",
+    "created_at": "2026-01-23T10:35:52Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43439/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43439",
+    "labels": [],
+    "merged": false,
+    "number": 43439,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[MoE] add clarifying comments to moe integration",
+    "updated_at": "2026-02-02T09:04:27Z"
+  },
+  {
+    "additions": 22,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43438",
+    "created_at": "2026-01-23T10:33:19Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43438/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43438",
+    "labels": [],
+    "merged": true,
+    "number": 43438,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "batched_mm is slow on cpu",
+    "updated_at": "2026-01-27T13:32:24Z"
+  },
+  {
+    "additions": 138,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We can see all the failing tests by removing `test_torch_exportable = False` from the code base. There are different reasons for the failures, mostly related to very dynamic processing (looping over the batch dimens\u2026",
+    "changed_files": 54,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43436",
+    "created_at": "2026-01-23T09:54:22Z",
+    "deletions": 58,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43436/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43436",
+    "labels": [],
+    "merged": true,
+    "number": 43436,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": "More export friendly rewrites and skipping the failing ones",
+    "updated_at": "2026-02-02T16:56:11Z"
+  },
+  {
+    "additions": 87,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43435",
+    "created_at": "2026-01-23T09:49:14Z",
+    "deletions": 36,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43435/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43435",
+    "labels": [],
+    "merged": true,
+    "number": 43435,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "XPU now supports MoE kernel(MegaBlocks) implementation",
+    "updated_at": "2026-02-02T11:12:29Z"
+  },
+  {
+    "additions": 8,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Modify so that if the batch returned from DataCollatorMixin is a dict, it is converted to BatchFeature and output. In most learning and projects, instead of returning the value returned from processing_class perform\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43430",
+    "created_at": "2026-01-23T07:38:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43430/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43430",
+    "labels": [],
+    "merged": false,
+    "number": 43430,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Make DataCollatorMixin handle BatchFeature",
+    "updated_at": "2026-01-26T04:56:06Z"
+  },
+  {
+    "additions": 4,
+    "author": "aswin00000",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Problem Ministral-3-3B-Instruct-2512 generates gibberish because `lm_head.weight` is not tied to `embed_tokens.weight`. ## Root Cause The Ministral-3-3B config has `text_config.tie_word_embeddings=True`, and the checkpoint only stores `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43429",
+    "created_at": "2026-01-23T06:53:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43429/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43429",
+    "labels": [],
+    "merged": true,
+    "number": 43429,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix lm_head weight tying in Mistral3ForConditionalGeneration",
+    "updated_at": "2026-01-26T08:19:24Z"
+  },
+  {
+    "additions": 11,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "kernels is not correctly called in benchmark_v2, should register before kernelize, see https://github.com/huggingface/transformers/blob/10e97cd508218546ef681a2c9b4c519ac0d927c3/src/transformers/modeling_utils.py#L3607, use 'use_kernels' in\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43428",
+    "created_at": "2026-01-23T03:32:22Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43428/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43428",
+    "labels": [],
+    "merged": true,
+    "number": 43428,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "benchmark-v2: minor fix for benchmark-v2, kernel is not correctly called",
+    "updated_at": "2026-01-31T13:48:54Z"
+  },
+  {
+    "additions": 8,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes bug when `param` is a torch.Tensor, it does not have `get_shape()` method. You can reproduce this bug using: `pytest -rA tests/tensor_parallel/test_tensor_parallel.py::TestTensorParallel2Proc::test_model_dense_forward_compile\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43427",
+    "created_at": "2026-01-23T02:55:01Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43427/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43427",
+    "labels": [],
+    "merged": false,
+    "number": 43427,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Adjust distributed backend in tensor parallel's test file",
+    "updated_at": "2026-04-13T02:40:23Z"
+  },
+  {
+    "additions": 42,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "bring back clean_up_tokenization_spaces to tokenizers backend! for v5 https://github.com/huggingface/transformers/pull/42916 https://github.com/huggingface/transformers/issues/42913 https://github.com/huggingface/transformers/pull/42900 ht\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43426",
+    "created_at": "2026-01-22T22:03:08Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43426/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43426",
+    "labels": [],
+    "merged": true,
+    "number": 43426,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "bring back clean_up_tokenization_spaces to tokenizers backend",
+    "updated_at": "2026-01-26T08:28:02Z"
+  },
+  {
+    "additions": 34,
+    "author": "justinchuby",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Previously the models were not exportable with dynamic shapes due to slices removed in https://github.com/huggingface/transformers/pull/41900. This PR adds a test to ensure executorch exportability with dynamic shap\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43424",
+    "created_at": "2026-01-22T20:56:27Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43424/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43424",
+    "labels": [],
+    "merged": false,
+    "number": 43424,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Add test to ensure executorch exportability with dynamic shapes",
+    "updated_at": "2026-02-19T11:38:16Z"
+  },
+  {
+    "additions": 34,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following fixes are made in this PR: \u2192 Override `__setattr__` in `TokenizersBackend` to automatically update the post-processor when special tokens (`bos_token`, `eos_token`, etc.) are modified at runtime. Thi\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43422",
+    "created_at": "2026-01-22T18:34:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43422/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43422",
+    "labels": [],
+    "merged": false,
+    "number": 43422,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat(tokenizer): Update post-processor when special tokens are modified in TokenizersBackend",
+    "updated_at": "2026-03-26T06:32:33Z"
+  },
+  {
+    "additions": 364,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Add audio-text-to-text finetuning guide ### What does this PR do? Adds a new task guide for audio-text-to-text models, covering how to fine-tune Voxtral for audio reasoning tasks using LoRA.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43413",
+    "created_at": "2026-01-22T15:18:01Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43413/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43413",
+    "labels": [],
+    "merged": true,
+    "number": 43413,
+    "review_comments_count": 23,
+    "state": "closed",
+    "title": "audio text to text task guide",
+    "updated_at": "2026-02-17T21:29:44Z"
+  },
+  {
+    "additions": 86,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, currently init weights assumes everything is uniform but these dynamic inits are slightly different",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43411",
+    "created_at": "2026-01-22T14:49:19Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43411/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43411",
+    "labels": [],
+    "merged": true,
+    "number": 43411,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`HunYuan`] Fix RoPE init",
+    "updated_at": "2026-02-02T11:17:30Z"
+  },
+  {
+    "additions": 314,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, not every config needs this mixin - it should be only done for those that have it",
+    "changed_files": 135,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43410",
+    "created_at": "2026-01-22T14:27:34Z",
+    "deletions": 300,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43410/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43410",
+    "labels": [],
+    "merged": true,
+    "number": 43410,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`RoPE`] Make explicit inheritance",
+    "updated_at": "2026-01-27T18:22:16Z"
+  },
+  {
+    "additions": 7,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes failed test case of `pytest -rA tests/models/lighton_ocr/test_modeling_lighton_ocr.py::LightOnOcrForConditionalGenerationModelTest::test_eager_matches_fa2_generate`, in this case `position_ids` is concated to 1D in [position_\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43403",
+    "created_at": "2026-01-22T06:21:37Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43403/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43403",
+    "labels": [],
+    "merged": true,
+    "number": 43403,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "handle 1D position_ids for modeling_flash_attention_utils as well",
+    "updated_at": "2026-01-26T13:42:41Z"
+  },
+  {
+    "additions": 2377,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Splitting off acoustic tokenizer from https://github.com/huggingface/transformers/pull/40546 Such that [VibeVoice ASR](https://huggingface.co/microsoft/VibeVoice-ASR) can be done in a separate / independent PR Model\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 24,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43400",
+    "created_at": "2026-01-22T01:45:26Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43400/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43400",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": true,
+    "number": 43400,
+    "review_comments_count": 28,
+    "state": "closed",
+    "title": "Add VibeVoice Acoustic Tokenizer",
+    "updated_at": "2026-02-06T08:53:01Z"
+  },
+  {
+    "additions": 8,
+    "author": "vaibhav-research",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes an initialization error in Qwen3-VL text model construction when loading checkpoints whose nested text_config does not explicitly define pad_token_id. there are 2 main issues \u2022 Qwen3VLTextModel.__init_\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43398",
+    "created_at": "2026-01-22T00:21:16Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43398/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43398",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 43398,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix: adding pad_token_id in Qwen3VLTextConfig",
+    "updated_at": "2026-01-29T13:58:27Z"
+  },
+  {
+    "additions": 409,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Initial `ty` integration. To avoid a gigantic, risky patch, let's start with a baby step where we add the tooling to `make repo-check` and activate it on a subset of the repo. That gives us a human-readable patch, a\u2026",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43396",
+    "created_at": "2026-01-21T16:52:29Z",
+    "deletions": 204,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43396/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43396",
+    "labels": [],
+    "merged": false,
+    "number": 43396,
+    "review_comments_count": 19,
+    "state": "closed",
+    "title": "chore(typing): initial `ty` integration",
+    "updated_at": "2026-02-20T07:40:52Z"
+  },
+  {
+    "additions": 290,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR fixes incorrect label handling in `Trainer.evaluation_loop` for models that use per-sample nested label structures like `tuple[list[Tensor], list[Tensor]]` (e.g., Mask2Former for instance segmentation). ###\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43395",
+    "created_at": "2026-01-21T16:50:36Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43395/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43395",
+    "labels": [],
+    "merged": false,
+    "number": 43395,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix label truncation for per-sample nested structures in Trainer",
+    "updated_at": "2026-01-30T14:17:54Z"
+  },
+  {
+    "additions": 479,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 111,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43394",
+    "created_at": "2026-01-21T15:33:53Z",
+    "deletions": 267,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43394/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43394",
+    "labels": [],
+    "merged": false,
+    "number": 43394,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "this is AI code, trying to add typing",
+    "updated_at": "2026-01-27T10:53:48Z"
+  },
+  {
+    "additions": 3218,
+    "author": "zRzRzRzRzRzRzR",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Need reivew for @zucchini-nlp",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43391",
+    "created_at": "2026-01-21T13:37:27Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43391/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43391",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43391,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "[GLM-OCR] GLM-OCR Support",
+    "updated_at": "2026-01-27T13:28:33Z"
+  },
+  {
+    "additions": 1,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Bug fix for ImageTextToTextPipeline on bfloat16 models. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contributor\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43387",
+    "created_at": "2026-01-21T06:14:39Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43387/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43387",
+    "labels": [],
+    "merged": false,
+    "number": 43387,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "model_inputs should have the same dtype",
+    "updated_at": "2026-02-08T08:56:18Z"
+  },
+  {
+    "additions": 1955,
+    "author": "XingweiDeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43385",
+    "created_at": "2026-01-21T03:34:41Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43385/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43385",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43385,
+    "review_comments_count": 162,
+    "state": "closed",
+    "title": "[Model] Add UVDoc Model Support",
+    "updated_at": "2026-03-20T22:15:50Z"
+  },
+  {
+    "additions": 164,
+    "author": "ankke",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes: For batched input with multiple images tiled input was mixed between batches.Now, using a util function. Adds: `tie_word_embeddings` to Lfm2VlConfig. Tests for above bug + updated integration test for the newest lfm2vl model. cc: @z\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43379",
+    "created_at": "2026-01-20T18:50:03Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43379/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43379",
+    "labels": [],
+    "merged": true,
+    "number": 43379,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Fix tiles mixing for batched input, add tie_word_embeddings to LFM2VL config",
+    "updated_at": "2026-01-28T09:32:04Z"
+  },
+  {
+    "additions": 36,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 `MimiModel` incorrectly processed batched inputs with different lengths because the `_encode_frame` method wasn't padding-aware, leading to significant\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43378",
+    "created_at": "2026-01-20T18:38:11Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43378/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43378",
+    "labels": [
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43378,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "feat(models): Make MimiModel encoding padding-aware to ensure batch-to-individual consistency",
+    "updated_at": "2026-04-18T08:38:37Z"
+  },
+  {
+    "additions": 7,
+    "author": "LeonardoEmili",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Seems like the same `fix_mistral_regex` is provided multiple times, replacing `get` with `pop` to avoid running into `KeyError` fixes the issue. ``` File \"/mambaforge/envs/cf/lib/python3.10/site-packages/transformer\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43376",
+    "created_at": "2026-01-20T16:43:19Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43376/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43376",
+    "labels": [],
+    "merged": true,
+    "number": 43376,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `KeyError` when patching mistral regex",
+    "updated_at": "2026-03-19T08:09:02Z"
+  },
+  {
+    "additions": 23,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fix all the remaining T5 failures. - The test `test_export_t5_summarization` was failing because `eos_token_id` was not being passed to GenerationConfig in `Seq2SeqLMExportableModule`, causing the generate loop to never stop at the end-of-\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43374",
+    "created_at": "2026-01-20T15:42:20Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43374/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43374",
+    "labels": [],
+    "merged": true,
+    "number": 43374,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "Fix t5 failures",
+    "updated_at": "2026-02-02T16:43:54Z"
+  },
+  {
+    "additions": 4,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "Trackio now natively supports GPU tracking, see https://github.com/gradio-app/trackio/releases/tag/trackio%400.14.0, so I suggest that we remove it from this callback.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43371",
+    "created_at": "2026-01-20T13:25:51Z",
+    "deletions": 46,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43371/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43371",
+    "labels": [],
+    "merged": true,
+    "number": 43371,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove GPU tracking from TrackioCallback and remove env var support",
+    "updated_at": "2026-02-09T10:06:11Z"
+  },
+  {
+    "additions": 141,
+    "author": "Dogacel",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? 1. Allow passing a custom `length_func` to the `DistributedLengthGroupedSampler` to support length grouping complex data. 2. Allow passing `mega_batch_mult` for fine-grained control of internal batching. 3. Add doc-\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43363",
+    "created_at": "2026-01-20T06:53:00Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43363/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43363",
+    "labels": [],
+    "merged": false,
+    "number": 43363,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[Improvement] Update `DistributedLengthGroupedSampler` to allow customizing length function",
+    "updated_at": "2026-03-26T20:21:18Z"
+  },
+  {
+    "additions": 3244,
+    "author": "yanhong-lbh",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for **OLMo Hybrid**, a hybrid architecture model that combines standard transformer attention layers with linear attention (Gated DeltaNet) layers for improved efficiency while maintaining model\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 26,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43358",
+    "created_at": "2026-01-19T22:33:37Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43358/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43358",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43358,
+    "review_comments_count": 237,
+    "state": "closed",
+    "title": "Add OLMo Hybrid model",
+    "updated_at": "2026-02-26T23:01:51Z"
+  },
+  {
+    "additions": 1739,
+    "author": "lmaksym",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add TDT (Token Duration Transducer) decoder support for Parakeet ASR models. TDT is a transducer-based architecture that jointly predicts tokens and their durations, enabling efficient decoding with accurate word-le\u2026",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43357",
+    "created_at": "2026-01-19T22:09:49Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43357/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43357",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43357,
+    "review_comments_count": 24,
+    "state": "closed",
+    "title": "Add Parakeet TDT model support",
+    "updated_at": "2026-02-20T08:49:45Z"
+  },
+  {
+    "additions": 6,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? The integration tests for `GlmImageForConditionalGeneration` were failing because they load `zai-org/GLM-4.5V` which is a `glm4v_moe` model, not `glm_image`. This causes shape mismatches: ``` lm_head.weight: [151552\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43354",
+    "created_at": "2026-01-19T16:13:20Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43354/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43354",
+    "labels": [],
+    "merged": false,
+    "number": 43354,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Skip glm_image integration tests (wrong checkpoint)",
+    "updated_at": "2026-01-26T15:31:31Z"
+  },
+  {
+    "additions": 1,
+    "author": "readleyj",
+    "author_association": "NONE",
+    "body_excerpt": "The synchronizations are unnecessary and kill performance. Before and after traces <img width=\"1073\" height=\"120\" alt=\"image\" src=\"https://github.com/user-attachments/assets/ccbde7ec-9466-40dd-81a0-8f337100739d\" /> <img width=\"912\" height=\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43349",
+    "created_at": "2026-01-19T09:54:48Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43349/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43349",
+    "labels": [],
+    "merged": false,
+    "number": 43349,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Remove unnecessary device synchronizations from finegrained FP8 matmul",
+    "updated_at": "2026-04-01T20:36:22Z"
+  },
+  {
+    "additions": 1812,
+    "author": "XingweiDeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 14,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43345",
+    "created_at": "2026-01-19T07:34:26Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43345/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43345",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 43345,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Model]  Add PP-LCNet Model Support",
+    "updated_at": "2026-03-10T04:10:58Z"
+  },
+  {
+    "additions": 4,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026y::ShieldGemma2IntegrationTest::test_model crash crash calltrack is ``` tests/models/shieldgemma2/test_modeling_shieldgemma2.py:49: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43343",
+    "created_at": "2026-01-19T05:52:23Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43343/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43343",
+    "labels": [],
+    "merged": true,
+    "number": 43343,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix `ShieldGemma2IntegrationTest::test_model`",
+    "updated_at": "2026-01-27T09:40:23Z"
+  },
+  {
+    "additions": 913,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR adds full batch processing support (batch_size > 1) for the GLM-Image model, fixes padding direction for autoregressive generation, and aligns configuration defaults with the official model. Need to be used\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43342",
+    "created_at": "2026-01-19T05:05:55Z",
+    "deletions": 558,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43342/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43342",
+    "labels": [],
+    "merged": true,
+    "number": 43342,
+    "review_comments_count": 81,
+    "state": "closed",
+    "title": "[GLM-Image] Add batch > 1 support and fix configuration defaults",
+    "updated_at": "2026-01-29T14:39:12Z"
+  },
+  {
+    "additions": 52,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43341",
+    "created_at": "2026-01-19T04:43:07Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43341/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43341",
+    "labels": [],
+    "merged": true,
+    "number": 43341,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "skip some unsupported tests for glm_image model ",
+    "updated_at": "2026-04-13T02:41:04Z"
+  },
+  {
+    "additions": 4305,
+    "author": "gautamvarmadatla",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #42971 This PR adds a **Claude Skill** for the `huggingface/transformers` to help contributors navigate the codebase and common development workflows more efficiently ### What\u2019s included - A repo-specific **C\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43340",
+    "created_at": "2026-01-19T03:04:30Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43340/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43340",
+    "labels": [],
+    "merged": false,
+    "number": 43340,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Claude code skills for transformers-api",
+    "updated_at": "2026-01-30T13:23:57Z"
+  },
+  {
+    "additions": 19,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh This PR fixes 2 failed test cases for XPU: ``` tests/models/lw_detr/test_modeling_lw_detr.py::LwDetrModelIntegrationTest::test_inference_object_detection_head_tiny tests/models/lw_detr/test_modeling_lw_detr.py::LwDetrModelIntegrat\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43339",
+    "created_at": "2026-01-19T01:38:59Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43339/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43339",
+    "labels": [],
+    "merged": true,
+    "number": 43339,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add xpu expectation for lw_detr model",
+    "updated_at": "2026-01-30T15:03:54Z"
+  },
+  {
+    "additions": 6,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following documentation improvements are made in this PR: \u2192 Added docstring notes to `num_sparse_encoder_layers` and `num_sparse_decoder_layers` parameter `SwitchTransformersConfig` explaining that when set to\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43336",
+    "created_at": "2026-01-17T11:29:20Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43336/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43336",
+    "labels": [],
+    "merged": true,
+    "number": 43336,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: Add Switch Transformers docstring notes and update spectrogram comment",
+    "updated_at": "2026-02-23T10:06:30Z"
+  },
+  {
+    "additions": 1,
+    "author": "Edge-Explorer",
+    "author_association": "NONE",
+    "body_excerpt": "docs: clarify documentation build instruction # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43332",
+    "created_at": "2026-01-17T06:16:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43332/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43332",
+    "labels": [],
+    "merged": false,
+    "number": 43332,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Clarify instructions for building documentation locally",
+    "updated_at": "2026-02-02T05:01:09Z"
+  },
+  {
+    "additions": 47,
+    "author": "Siddhartha7340",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "What does this PR do? --------------------- This PR fixes and stabilizes doctests for YOSO model implementations. Summary of changes ----------------------------------- - Fixes the failing doctest for **YosoForMaskedLM.forward** by: - usin\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43331",
+    "created_at": "2026-01-17T05:16:36Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43331/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43331",
+    "labels": [],
+    "merged": false,
+    "number": 43331,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs(yoso): fix and stabilize doctest for YOSO",
+    "updated_at": "2026-03-24T18:18:37Z"
+  },
+  {
+    "additions": 46,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Fix https://github.com/huggingface/transformers/issues/43317. They move the devices without being asked to do it, resulting in OOMs when using with a device_map=\"auto\". Also, no need to upscale the\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43326",
+    "created_at": "2026-01-16T15:02:50Z",
+    "deletions": 33,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43326/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43326",
+    "labels": [],
+    "merged": true,
+    "number": 43326,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix Mxfp4 dequantize",
+    "updated_at": "2026-01-26T12:30:25Z"
+  },
+  {
+    "additions": 140,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "More V5 pipeline cleanup, followup to #43256 and #43306: (changelist updated Jan 26th) - `question-answering` and `visual-question-answering` removed - `image-to-image` removed - Updated the default `text-generation` and `image-text-to-tex\u2026",
+    "changed_files": 116,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43325",
+    "created_at": "2026-01-16T15:01:51Z",
+    "deletions": 2679,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43325/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43325",
+    "labels": [],
+    "merged": true,
+    "number": 43325,
+    "review_comments_count": 40,
+    "state": "closed",
+    "title": "\ud83d\udea8 More V5 pipeline cleanup",
+    "updated_at": "2026-03-09T14:17:40Z"
+  },
+  {
+    "additions": 3,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh This PR fixes 2 failed test cases for XPU: ``` tests/models/minimax_m2/test_modeling_minimax_m2.py::MiniMaxM2IntegrationTest::test_small_model_logits_batched tests/models/minimax_m2/test_modeling_minimax_m2.py::MiniMaxM2Integratio\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43324",
+    "created_at": "2026-01-16T14:47:41Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43324/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43324",
+    "labels": [],
+    "merged": true,
+    "number": 43324,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "minimax_m2: fix failed test case for XPU",
+    "updated_at": "2026-04-13T02:40:57Z"
+  },
+  {
+    "additions": 1606,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR cleans up backbone utilities. Specifically, we have currently 5 different config attr to decide which backbone to load, most of which can be merged into one and seem redundant After this PR, we'll have only\u2026",
+    "changed_files": 143,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43323",
+    "created_at": "2026-01-16T14:32:44Z",
+    "deletions": 3055,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43323/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43323",
+    "labels": [],
+    "merged": true,
+    "number": 43323,
+    "review_comments_count": 23,
+    "state": "closed",
+    "title": "\ud83d\udea8 Delete duplicate code in backbone utils",
+    "updated_at": "2026-02-04T10:37:57Z"
+  },
+  {
+    "additions": 98,
+    "author": "Tcc0403",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43320",
+    "created_at": "2026-01-16T12:35:08Z",
+    "deletions": 46,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43320/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43320",
+    "labels": [],
+    "merged": true,
+    "number": 43320,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix typing hints for different rope parameters per layer type",
+    "updated_at": "2026-01-26T09:12:54Z"
+  },
+  {
+    "additions": 0,
+    "author": "vaibhav-research",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR improves the correctness and robustness of model checkpoint saving by removing a redundant parallelism_config branch from Trainer.save_model(). The removed logic attempted to special-case checkpoint saving w\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43314",
+    "created_at": "2026-01-16T02:43:03Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43314/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43314",
+    "labels": [],
+    "merged": true,
+    "number": 43314,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "43125: Ensure correct checkpoint saving behavior by simplifying Trainer.save_model parallelism logic",
+    "updated_at": "2026-01-24T17:30:06Z"
+  },
+  {
+    "additions": 4,
+    "author": "VedantMadane",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fix the default interpolation method for MobileNet v1 and v2 image processors from BILINEAR to BICUBIC, matching the original timm implementation. ## Motivation As part of #28180, we need to verify that image processor interpola\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43313",
+    "created_at": "2026-01-16T02:33:38Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43313/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43313",
+    "labels": [],
+    "merged": false,
+    "number": 43313,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix MobileNet v1/v2 image processor default interpolation to BICUBIC",
+    "updated_at": "2026-03-26T12:40:52Z"
+  },
+  {
+    "additions": 136,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? adds the cutlass kernel for scaled matmul, the performance is much better than triton for the specific block size : (128, 128): ``` ================================================================================ CO\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43304",
+    "created_at": "2026-01-15T12:36:41Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43304/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43304",
+    "labels": [],
+    "merged": true,
+    "number": 43304,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "[Quantization] Add cutlass kernel for FP8",
+    "updated_at": "2026-01-28T10:44:34Z"
+  },
+  {
+    "additions": 256,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This pull request introduces enhancements to the DeepSpeed integration for model loading, specifically improving how weight conversions (such as renaming and merging/splitting of weights) are handled when loading st\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43303",
+    "created_at": "2026-01-15T11:43:12Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43303/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43303",
+    "labels": [],
+    "merged": true,
+    "number": 43303,
+    "review_comments_count": 14,
+    "state": "closed",
+    "title": "[DeepSpeed] add weight_mapping to _load_state_dict_into_zero3_model",
+    "updated_at": "2026-01-24T17:16:44Z"
+  },
+  {
+    "additions": 78,
+    "author": "ZLkanyo009",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "By tokenizing only a single <image_pad> or <video_pad> into the input_ids and inserting them in the form of n * grid_thw at the end, the tokenization of Qwen3VL is accelerated. In SGLang, we discovered that the tokenization process for the\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43297",
+    "created_at": "2026-01-15T03:17:45Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43297/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43297",
+    "labels": [],
+    "merged": false,
+    "number": 43297,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "[Feat] Reduces redundant tokenization of <pad> tags to accelerate Qwen3VL.",
+    "updated_at": "2026-03-18T06:57:15Z"
+  },
+  {
+    "additions": 31,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43291",
+    "created_at": "2026-01-14T17:28:57Z",
+    "deletions": 28,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43291/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43291",
+    "labels": [],
+    "merged": false,
+    "number": 43291,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix whisper tests",
+    "updated_at": "2026-01-26T09:36:42Z"
+  },
+  {
+    "additions": 10,
+    "author": "ansh-info",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR fixes a crash in `AutoTokenizer.from_pretrained` when `AutoConfig` fails and the code falls back to `PreTrainedConfig`. The fallback path incorrectly calls `.get()` on a config object, which raises `Attribut\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43289",
+    "created_at": "2026-01-14T15:51:58Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43289/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43289",
+    "labels": [],
+    "merged": false,
+    "number": 43289,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix:auto tokenizer pretrainedconfig",
+    "updated_at": "2026-01-26T13:18:38Z"
+  },
+  {
+    "additions": 6,
+    "author": "gau-nernst",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Currently, to adjust the speed of generated audio with VitsModel, I have to modify the `.speaking_rate` attribute ```python from transformers import VitsModel, AutoTokenizer import torch from IPython.display import\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43283",
+    "created_at": "2026-01-14T10:59:47Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43283/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43283",
+    "labels": [],
+    "merged": true,
+    "number": 43283,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[VITS] Add `speaking_rate` as an optionl forward argument",
+    "updated_at": "2026-03-03T06:11:16Z"
+  },
+  {
+    "additions": 226,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, less custom code and more standardization. There are still a few models with completely custom `prepare_inputs_for_generation`, I can't make those models happy with `super().prepare_inputs_for_generati\u2026",
+    "changed_files": 25,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43280",
+    "created_at": "2026-01-14T09:14:23Z",
+    "deletions": 740,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43280/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43280",
+    "labels": [],
+    "merged": true,
+    "number": 43280,
+    "review_comments_count": 28,
+    "state": "closed",
+    "title": "Prepare inputs for generation is called from `super()`",
+    "updated_at": "2026-01-26T17:51:13Z"
+  },
+  {
+    "additions": 2650,
+    "author": "XingweiDeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 31,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43274",
+    "created_at": "2026-01-14T04:29:07Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43274/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43274",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43274,
+    "review_comments_count": 202,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRV5_server_det Model Support",
+    "updated_at": "2026-03-12T18:41:29Z"
+  },
+  {
+    "additions": 11,
+    "author": "theonlypal",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Use processor's `feature_extractor.sampling_rate` as default when loading audio in `apply_chat_template()` - Fall back to 16kHz only when no feature_extractor is available ## Problem `apply_chat_template()` hardcoded 16kHz as\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43273",
+    "created_at": "2026-01-14T03:56:14Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43273/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43273",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43273,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "fix(processing): use feature_extractor.sampling_rate as default in apply_chat_template",
+    "updated_at": "2026-02-02T12:15:38Z"
+  },
+  {
+    "additions": 3713,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds Omnilingual ASR: https://github.com/facebookresearch/omnilingual-asr CTC-variant - [x] functional conversion to checkpoint that is (more) Transformers-compatible: https://huggingface.co/bezzam/omniasr-ctc-300m-\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43265",
+    "created_at": "2026-01-13T20:02:54Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43265/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43265",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43265,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Adding Omnilingual ASR models",
+    "updated_at": "2026-04-13T07:52:07Z"
+  },
+  {
+    "additions": 757,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #42491 This should serve as an example of how the weight loader can be re-used in other project. The content is probably gonna be upstreamed to peft! Current status: <img width=\"963\" height=\"396\" alt=\"image\" s\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43261",
+    "created_at": "2026-01-13T17:55:23Z",
+    "deletions": 303,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43261/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43261",
+    "labels": [],
+    "merged": true,
+    "number": 43261,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix PEFT x MoEs",
+    "updated_at": "2026-01-24T10:06:54Z"
+  },
+  {
+    "additions": 5,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR allows some specific kwargs to be passed through to `fixed_cross_entropy`. Fixes #43240 cc @arthurzucker because I think you wrote the original?",
+    "changed_files": 1,
+    "cluster_id": "cluster-43240-3",
+    "cluster_ids": [
+      "cluster-43240-3"
+    ],
+    "cluster_role": "member",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43254",
+    "created_at": "2026-01-13T13:52:59Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43254/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43254",
+    "labels": [],
+    "merged": false,
+    "number": 43254,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add supported kwargs to fixed_cross_entropy",
+    "updated_at": "2026-01-29T19:00:04Z"
+  },
+  {
+    "additions": 13,
+    "author": "jasiecky",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? The problem to be solved is the issue https://github.com/huggingface/transformers/issues/43240. This PR implements passing weight and label_smoothing parameters of nn.functional.cross_entropy in fixed_cross_entropy\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43240-3",
+    "cluster_ids": [
+      "cluster-43240-3"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43251",
+    "created_at": "2026-01-13T11:38:16Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43251",
+    "labels": [],
+    "merged": false,
+    "number": 43251,
+    "review_comments_count": 5,
+    "state": "open",
+    "title": "Fix(43240): pass kwargs to nn.functional.cross_entropy",
+    "updated_at": "2026-02-02T08:46:34Z"
+  },
+  {
+    "additions": 3931,
+    "author": "XingweiDeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 33,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 28,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43247",
+    "created_at": "2026-01-13T08:58:19Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43247/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43247",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43247,
+    "review_comments_count": 276,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRV5_mobile_det Model Support ",
+    "updated_at": "2026-03-14T07:19:32Z"
+  },
+  {
+    "additions": 5,
+    "author": "sasankkurnella",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43240 The `fixed_cross_entropy` function accepts `**kwargs` but wasn't using them properly. This meant parameters like `label_smoothing` were silently ignored. **The fix:** Filter and pass only valid `cross_\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43242",
+    "created_at": "2026-01-13T03:17:11Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43242/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43242",
+    "labels": [],
+    "merged": false,
+    "number": 43242,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: pass kwargs to cross_entropy in fixed_cross_entropy",
+    "updated_at": "2026-03-25T10:55:13Z"
+  },
+  {
+    "additions": 230,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds ecosystem integration docs for deploying with Candle, ExecuTorch, and MLX",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43241",
+    "created_at": "2026-01-13T02:07:00Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43241/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43241",
+    "labels": [],
+    "merged": true,
+    "number": 43241,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "[docs] deploying",
+    "updated_at": "2026-02-05T16:36:52Z"
+  },
+  {
+    "additions": 148,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "simplify extra tokens logic in base some light refactoring, hopefully a bit more readable",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43230",
+    "created_at": "2026-01-12T11:08:35Z",
+    "deletions": 217,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43230/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43230",
+    "labels": [],
+    "merged": true,
+    "number": 43230,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "simplify extra tokens logic in base",
+    "updated_at": "2026-01-29T15:48:42Z"
+  },
+  {
+    "additions": 80,
+    "author": "adi776borate",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43222",
+    "created_at": "2026-01-11T18:25:11Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43222/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43222",
+    "labels": [],
+    "merged": false,
+    "number": 43222,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Fix] Special token encoding in MistralCommonBackend",
+    "updated_at": "2026-01-30T09:06:29Z"
+  },
+  {
+    "additions": 40,
+    "author": "GaspTO",
+    "author_association": "NONE",
+    "body_excerpt": "Add the changes proposed in the issue #43215 (The issue has not been approved yet, but I already had these changes, so it made sense to just open a PR to not lose this).",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43220",
+    "created_at": "2026-01-11T12:33:18Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43220/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43220",
+    "labels": [],
+    "merged": false,
+    "number": 43220,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add output_attentions to dinov3",
+    "updated_at": "2026-04-05T17:51:54Z"
+  },
+  {
+    "additions": 8,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #42890 SAM-HQ integration tests were failing due to non-deterministic positional embeddings. The `positional_embedding` parameter in `SamHQPositionalEmbedding` is: 1. Randomly initialized using `torch.randn()` (line 418 in\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43211",
+    "created_at": "2026-01-10T13:15:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43211/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43211",
+    "labels": [],
+    "merged": false,
+    "number": 43211,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix flaky SAM-HQ integration tests",
+    "updated_at": "2026-01-24T15:31:17Z"
+  },
+  {
+    "additions": 25,
+    "author": "Anri-Lombard",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes three bugs in the xLSTM implementation that prevent training models smaller than 7B parameters. Fixes #43208 ## Changes 1. **Line 235**: Fixed typo where tensor was called as function instead of using `.reshap\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43209",
+    "created_at": "2026-01-10T06:59:26Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43209/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43209",
+    "labels": [],
+    "merged": true,
+    "number": 43209,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "[xLSTM] Fix bugs preventing small model training",
+    "updated_at": "2026-02-09T18:20:37Z"
+  },
+  {
+    "additions": 22,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? https://gtihub.com/databricks/dbrx-instruct and https://huggingface.co/databricks/dbrx-instruct are now closed (do you know why?) It implies some updates in the doc to avoid dead links. I suggest that we re-upload t\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43196",
+    "created_at": "2026-01-09T14:55:31Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43196/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43196",
+    "labels": [],
+    "merged": true,
+    "number": 43196,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update DBRX docs to reference re-uploaded checkpoint",
+    "updated_at": "2026-02-02T15:46:52Z"
+  },
+  {
+    "additions": 47,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title! Most important change is the removal of the 2 deprecated auto classes",
+    "changed_files": 65,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43195",
+    "created_at": "2026-01-09T14:53:58Z",
+    "deletions": 1025,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43195/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43195",
+    "labels": [],
+    "merged": true,
+    "number": 43195,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove more deprecated objects/args",
+    "updated_at": "2026-02-03T09:54:17Z"
+  },
+  {
+    "additions": 20,
+    "author": "raimbekovm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43186 ## What does this PR do? Fixes floating-point precision issue in `JanusImageProcessor.resize()` where `int()` truncation caused incorrect output dimensions. **Problem:** ```python height = 2522, size = 384 delta = 384 / 2522 =\u2026",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43187",
+    "created_at": "2026-01-09T09:18:09Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43187/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43187",
+    "labels": [],
+    "merged": true,
+    "number": 43187,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "\ud83d\udea8Fix floating-point precision in JanusImageProcessor resize",
+    "updated_at": "2026-02-02T18:47:34Z"
+  },
+  {
+    "additions": 347,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR fixes a large portion of the failing pipeline tests on AMD.",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43178",
+    "created_at": "2026-01-08T18:13:10Z",
+    "deletions": 143,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43178/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43178",
+    "labels": [],
+    "merged": true,
+    "number": 43178,
+    "review_comments_count": 44,
+    "state": "closed",
+    "title": "[CI][AMD] Fix Pipeline CI ",
+    "updated_at": "2026-01-29T10:37:32Z"
+  },
+  {
+    "additions": 6,
+    "author": "prashantpandeygit",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR fixes cache reordering causing runtime error when keys and values are on different devices While working with beam search, with the cache operations, i got this error attached <img width=\"1075\" height=\"145\"\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43177",
+    "created_at": "2026-01-08T17:39:27Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43177/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43177",
+    "labels": [],
+    "merged": false,
+    "number": 43177,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "make devices consistent in cache reordering",
+    "updated_at": "2026-02-17T08:25:36Z"
+  },
+  {
+    "additions": 0,
+    "author": "lashahub",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR adds support for **Music Flamingo**, NVIDIA's open large audio-language model designed for deep music understanding and reasoning. - **Paper**: [Music Flamingo: Scaling Music Understanding in Audio Language Models](https://huggingf\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43174",
+    "created_at": "2026-01-08T16:47:50Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43174/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43174",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43174,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Add Music Flamingo",
+    "updated_at": "2026-01-24T03:20:30Z"
+  },
+  {
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43173",
+    "created_at": "2026-01-08T16:16:43Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43173/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43173",
+    "labels": [],
+    "merged": false,
+    "number": 43173,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Timm`] Increase tol in flaky test",
+    "updated_at": "2026-03-14T20:00:03Z"
+  },
+  {
+    "additions": 1434,
+    "author": "LuJunru",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 24,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43166",
+    "created_at": "2026-01-08T10:02:59Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43166",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43166,
+    "review_comments_count": 83,
+    "state": "closed",
+    "title": "Add Youtu-LLM model",
+    "updated_at": "2026-04-05T16:53:59Z"
+  },
+  {
+    "additions": 11,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Under the current strategy, the memory allocated to device 0 is too small, which may cause the model to skip it entirely. We should ensure device 0 has enough capacity to accommodate at least the largest layer of the model, guaranteeing th\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43162",
+    "created_at": "2026-01-08T06:23:02Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43162/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43162",
+    "labels": [],
+    "merged": false,
+    "number": 43162,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix device 0 memory allocate",
+    "updated_at": "2026-04-20T02:29:52Z"
+  },
+  {
+    "additions": 5,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, some models still fluctuate keeping an eye out for a week to see if more models need to be added - e.g. qwen 3 omni moe was not caught in the initial nightly ci but the 2nd day Keeping this as a draft to wait for potentially\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43152",
+    "created_at": "2026-01-07T16:23:22Z",
+    "deletions": 4,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43152",
+    "labels": [],
+    "merged": false,
+    "number": 43152,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`Moe`] Pin more models to eager",
+    "updated_at": "2026-03-14T20:00:07Z"
+  },
+  {
+    "additions": 129,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes mxfp4 quantization by using a reverse_op during saving",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43148",
+    "created_at": "2026-01-07T14:54:45Z",
+    "deletions": 64,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43148",
+    "labels": [],
+    "merged": true,
+    "number": 43148,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Quantization] Fixing mxfp4 saving using reverse_op",
+    "updated_at": "2026-02-27T13:28:53Z"
+  },
+  {
+    "additions": 9,
+    "author": "adi776borate",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43141",
+    "created_at": "2026-01-07T06:36:16Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43141/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43141",
+    "labels": [],
+    "merged": true,
+    "number": 43141,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Fix] Add missing init_kwargs to MistralCommonBackend for v5 processor compatibility",
+    "updated_at": "2026-01-30T09:06:27Z"
+  },
+  {
+    "additions": 42,
+    "author": "jiosephlee",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds ability to control the sampling strategy of data during training, unifying `group_by_length` with the default random sampling, and additionally adds sequential sampling. Following up on previous PR #42265 @SunM\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43138",
+    "created_at": "2026-01-06T23:10:55Z",
+    "deletions": 32,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43138",
+    "labels": [],
+    "merged": true,
+    "number": 43138,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Train sampler unification",
+    "updated_at": "2026-02-15T23:57:55Z"
+  },
+  {
+    "additions": 23,
+    "author": "sumukhacharya03",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Fixes incorrect multi-label prediction thresholding that caused empty predictions in predict_results.txt for multi-label classification.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43135",
+    "created_at": "2026-01-06T16:52:09Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43135",
+    "labels": [],
+    "merged": false,
+    "number": 43135,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix multi-label prediction thresholding in run_classification",
+    "updated_at": "2026-03-10T07:15:02Z"
+  },
+  {
+    "additions": 911,
+    "author": "ChiaraBoretti",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? **Summary** This PR introduces an integration of the [SINQ](https://huggingface.co/papers/2509.22944) quantization method into the Hugging Face Transformers library. It follows the pattern of existing quantization i\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43112",
+    "created_at": "2026-01-05T14:35:44Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43112/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43112",
+    "labels": [],
+    "merged": true,
+    "number": 43112,
+    "review_comments_count": 113,
+    "state": "closed",
+    "title": "SINQ quantization strategy integration (adapted for Transformers V5)",
+    "updated_at": "2026-02-16T15:08:42Z"
+  },
+  {
+    "additions": 300,
+    "author": "vaibhav-research",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR makes SigLIP2 text preprocessing explicit and consistent with how the model was trained. It introduces a model specific tokenizer (Siglip2Tokenizer) that wraps the existing GemmaTokenizer while enforcing Sig\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43101",
+    "created_at": "2026-01-04T22:59:05Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43101",
+    "labels": [],
+    "merged": true,
+    "number": 43101,
+    "review_comments_count": 51,
+    "state": "closed",
+    "title": "Add Siglip2Tokenizer to enforce training-time text preprocessing defaults",
+    "updated_at": "2026-04-06T23:55:44Z"
+  },
+  {
+    "additions": 5285,
+    "author": "zhang-prog",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 14,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43098",
+    "created_at": "2026-01-04T12:10:22Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43098",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43098,
+    "review_comments_count": 178,
+    "state": "closed",
+    "title": "[Model] Add PP-DocLayoutV3 Model Support",
+    "updated_at": "2026-01-29T09:56:11Z"
+  },
+  {
+    "additions": 519,
+    "author": "AmitMY",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Adds `async_stopping_criteria` flag to `GenerationConfig` that enables asynchronous stopping criteria checks during generation - When enabled, stopping criteria are evaluated on a separate CUDA stream, allowing generation to c\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43085",
+    "created_at": "2026-01-03T09:42:33Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43085",
+    "labels": [],
+    "merged": false,
+    "number": 43085,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Add async_stopping_criteria flag to reduce GPU-CPU syncs during generation",
+    "updated_at": "2026-02-09T17:16:29Z"
+  },
+  {
+    "additions": 1821,
+    "author": "nuxlear",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 22,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43080",
+    "created_at": "2026-01-02T07:29:46Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43080",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43080,
+    "review_comments_count": 54,
+    "state": "closed",
+    "title": "Add EXAONE-MoE implementations",
+    "updated_at": "2026-02-03T17:27:36Z"
+  },
+  {
+    "additions": 12,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43076",
+    "created_at": "2026-01-01T07:48:34Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43076",
+    "labels": [],
+    "merged": true,
+    "number": 43076,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix markdown documentation",
+    "updated_at": "2026-02-07T02:25:20Z"
+  },
+  {
+    "additions": 4,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "This allows the Transformers modeling backend for vLLM to load Mixtral with Transformers v5 because it currently (once https://github.com/vllm-project/vllm/pull/31545 is merged) only supports `WeightRenaming` but not `WeightConverter`",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43071",
+    "created_at": "2025-12-30T21:24:24Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43071/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43071",
+    "labels": [],
+    "merged": false,
+    "number": 43071,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Separate renaming from merging in Mixtral conversion mapping",
+    "updated_at": "2026-01-30T12:00:55Z"
+  },
+  {
+    "additions": 1755,
+    "author": "ed22699",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR internalises the NomicBERT model, following the basic structure of the https://huggingface.co/nomic-ai/nomic-bert-2048 Fixes #42738 ## Problem BERT-like models using RoPE are currently not internalized in ou\u2026",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 77,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43067",
+    "created_at": "2025-12-29T18:31:22Z",
+    "deletions": 28,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43067",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43067,
+    "review_comments_count": 255,
+    "state": "closed",
+    "title": "Internalise the NomicBERT model",
+    "updated_at": "2026-04-02T14:23:32Z"
+  },
+  {
+    "additions": 36,
+    "author": "majiayu000",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Add test coverage for batched inference in the ObjectDetectionPipeline: - Tests that using `batch_size` parameter with DataLoader-based batching correctly returns results for all input images - Verifies that each image in the ba\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43057",
+    "created_at": "2025-12-27T17:25:31Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43057",
+    "labels": [],
+    "merged": false,
+    "number": 43057,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "test: add batched inference test for ObjectDetectionPipeline",
+    "updated_at": "2026-04-10T13:14:58Z"
+  },
+  {
+    "additions": 1,
+    "author": "Olexandr88",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Avoid potential KeyError when accessing _file_ in CLIP lazy module.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43050",
+    "created_at": "2025-12-26T13:28:31Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43050",
+    "labels": [],
+    "merged": false,
+    "number": 43050,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix safe _file_ access in CLIP lazy module",
+    "updated_at": "2026-01-28T06:32:36Z"
+  },
+  {
+    "additions": 6,
+    "author": "CodersAcademy006",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR resolves the model parallelism crash in `PeVideo` and `PeAudioVideo` models by adding `_no_split_modules = [\"TimmWrapperForImageClassification\"]` to their configuration. Currently, `accelerate` naively splits the `timm`-based visio\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43047",
+    "created_at": "2025-12-26T08:58:03Z",
+    "deletions": 16,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43047",
+    "labels": [],
+    "merged": true,
+    "number": 43047,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: prevent accelerate from splitting vision encoder by setting _no_\u2026",
+    "updated_at": "2026-04-14T13:36:03Z"
+  },
+  {
+    "additions": 874,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR: 1. Refactors `ModernBERT's Flash Attention` implementation using the built-in interfaces from `Transformers`. 2. Fixes some UT issues in the `tests/models/modernbert/test_modeling_modernbert.py` and `tests/\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 40,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43030",
+    "created_at": "2025-12-24T11:54:53Z",
+    "deletions": 2021,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43030/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43030",
+    "labels": [],
+    "merged": true,
+    "number": 43030,
+    "review_comments_count": 89,
+    "state": "closed",
+    "title": "[Model] Refactor modernbert with the attention interface",
+    "updated_at": "2026-01-29T14:21:11Z"
+  },
+  {
+    "additions": 1172,
+    "author": "Aznix07",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR adds support for the **MiMo-V2-Flash** architecture from Xiaomi (reference: [XiaomiMiMo/MiMo-V2-Flash](https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash)). MiMo-V2-Flash is a large-scale Mixture-of-Experts (M\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43020",
+    "created_at": "2025-12-23T13:02:30Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43020/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43020",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 43020,
+    "review_comments_count": 24,
+    "state": "open",
+    "title": "Add mimo v2 flash",
+    "updated_at": "2026-03-03T09:16:22Z"
+  },
+  {
+    "additions": 5148,
+    "author": "zhang-prog",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds PP-DocLayoutV2 model to Hugging Face Transformers from PaddleOCR. Relevant Links: [PaddleOCR](https://www.paddleocr.ai/latest/index.html) https://huggingface.co/PaddlePaddle/PP-DocLayoutV2_safetensors #\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 35,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43018",
+    "created_at": "2025-12-23T11:18:14Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43018",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43018,
+    "review_comments_count": 151,
+    "state": "closed",
+    "title": "[Model] Add PP-DocLayoutV2 Model Support",
+    "updated_at": "2026-02-27T09:29:00Z"
+  },
+  {
+    "additions": 114,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42993",
+    "created_at": "2025-12-22T07:29:55Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42993/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42993",
+    "labels": [],
+    "merged": false,
+    "number": 42993,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "add Intel XPU platform support for benchmark_v2",
+    "updated_at": "2026-04-13T02:40:55Z"
+  },
+  {
+    "additions": 3,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update doc for CPU torchao int4 weight-only usage as api changed.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42992",
+    "created_at": "2025-12-22T07:12:27Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42992/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42992",
+    "labels": [],
+    "merged": true,
+    "number": 42992,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update cpu torchao usage",
+    "updated_at": "2026-04-20T02:29:50Z"
+  },
+  {
+    "additions": 1,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? It is replaced by `AnnotationFormat`.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42983",
+    "created_at": "2025-12-22T01:38:53Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42983",
+    "labels": [],
+    "merged": true,
+    "number": 42983,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "\ud83d\udea8 Remove deprecated AnnotionFormat",
+    "updated_at": "2026-01-31T01:17:44Z"
+  },
+  {
+    "additions": 3453,
+    "author": "sbucaille",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #42977 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42978",
+    "created_at": "2025-12-21T05:34:27Z",
+    "deletions": 1305,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42978/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42978",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 42978,
+    "review_comments_count": 14,
+    "state": "open",
+    "title": "Add ViT NEPA",
+    "updated_at": "2026-02-11T13:58:35Z"
+  },
+  {
+    "additions": 23,
+    "author": "salmanmkc",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR upgrades GitHub Actions to their latest versions for Node.js 24 compatibility and security updates. ## Changes | Action | Old Version(s) | New Version | Files | |--------|---------------|-------------|-------| | conda-in\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42976",
+    "created_at": "2025-12-20T23:38:07Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42976/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42976",
+    "labels": [],
+    "merged": false,
+    "number": 42976,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Upgrade GitHub Actions to latest versions",
+    "updated_at": "2026-01-25T00:07:18Z"
+  },
+  {
+    "additions": 228,
+    "author": "salmanmkc",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR upgrades GitHub Actions to their latest versions for Node.js 24 compatibility and security updates. ## Changes | Action | Old Version(s) | New Version | Files | |--------|---------------|-------------|-------| | actions/\u2026",
+    "changed_files": 33,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42975",
+    "created_at": "2025-12-20T23:37:58Z",
+    "deletions": 228,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42975/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42975",
+    "labels": [],
+    "merged": false,
+    "number": 42975,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Upgrade GitHub Actions for Node 24 compatibility",
+    "updated_at": "2026-01-25T00:07:56Z"
+  },
+  {
+    "additions": 680,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "EDIT: Ready for review! This is a base test and tester class for VLMs, similar to the existing [Causal LM tests](https://github.com/huggingface/transformers/pull/37590). The main difficulty here is that VLMs are more variable than pure LLM\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42964",
+    "created_at": "2025-12-19T17:42:51Z",
+    "deletions": 832,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42964/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42964",
+    "labels": [],
+    "merged": true,
+    "number": 42964,
+    "review_comments_count": 65,
+    "state": "closed",
+    "title": "Add shared VLM tests",
+    "updated_at": "2026-03-11T15:29:30Z"
+  },
+  {
+    "additions": 493,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Follow-up to https://github.com/huggingface/transformers/pull/42309 to really leverage meta device loading. Gives crazy speedups for loading some models, e.g. *about 2.5x on gpt-oss 20b* and *about 3x on the 120b ve\u2026",
+    "changed_files": 55,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42941",
+    "created_at": "2025-12-18T11:17:42Z",
+    "deletions": 353,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42941/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42941",
+    "labels": [],
+    "merged": true,
+    "number": 42941,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "[loading] Really initialize on meta device for huge perf gains",
+    "updated_at": "2026-02-03T15:54:50Z"
+  },
+  {
+    "additions": 62,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026onTest pass in cuda and xpu @Rocketknight1",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42938",
+    "created_at": "2025-12-18T06:36:52Z",
+    "deletions": 74,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42938/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42938",
+    "labels": [],
+    "merged": false,
+    "number": 42938,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "make tests/models/sam_hq/test_modeling_sam_hq.py::SamHQModelIntegrati\u2026",
+    "updated_at": "2026-01-27T01:19:44Z"
+  },
+  {
+    "additions": 8,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42931",
+    "created_at": "2025-12-17T17:27:59Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42931/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42931",
+    "labels": [],
+    "merged": true,
+    "number": 42931,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: Squared ReLU paper fix",
+    "updated_at": "2026-02-11T12:35:53Z"
+  },
+  {
+    "additions": 28,
+    "author": "KyleMylonakisProtopia",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? GPT OSS Models have a conversion script which allows transforming from the distributed quantized weights to a Hugging Face model in BFloat16. This also converts aspects of the tokenizer. As a side note, it also fixe\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42901",
+    "created_at": "2025-12-16T12:41:13Z",
+    "deletions": 42,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42901/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42901",
+    "labels": [],
+    "merged": true,
+    "number": 42901,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "fix: GPT OSS Conversion Script Enhancements",
+    "updated_at": "2026-02-18T14:30:17Z"
+  },
+  {
+    "additions": 18,
+    "author": "kitaekatt",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Add `attn_logit_softcapping` extraction to GGUF config mapping for Gemma2 and Gemma3 architectures. ## Problem When loading Gemma2/Gemma3 GGUF models, the `attn_logit_softcapping` parameter is not extracted from GGUF metadata. T\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42881",
+    "created_at": "2025-12-15T16:57:53Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/42881/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42881",
+    "labels": [],
+    "merged": false,
+    "number": 42881,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[GGUF] Add attn_logit_softcapping to Gemma2/Gemma3 config mapping",
+    "updated_at": "2026-02-04T20:34:41Z"
+  },
+  {
+    "additions": 57,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "CPU can also use paged cache with eager or sdpa: `python continuous_batching_simple.py --attn sdpa` Without this change, the previous command error would be like: ``` Error in generation loop: unsupported operand type(s) for -: 'NoneType'\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42869",
+    "created_at": "2025-12-15T08:43:57Z",
+    "deletions": 29,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42869/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42869",
+    "labels": [],
+    "merged": true,
+    "number": 42869,
+    "review_comments_count": 18,
+    "state": "closed",
+    "title": "enable cpu paged cache",
+    "updated_at": "2026-04-20T02:29:35Z"
+  },
+  {
+    "additions": 15,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR: 1. Remove ipex/ccl in CPU training doc 2. Fix bf16 check: CPU does not need torch_xla if use bf16.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42866",
+    "created_at": "2025-12-15T03:09:19Z",
+    "deletions": 53,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42866/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42866",
+    "labels": [],
+    "merged": true,
+    "number": 42866,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Remove ipex/ccl in CPU training doc",
+    "updated_at": "2026-04-20T02:29:49Z"
+  },
+  {
+    "additions": 385,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "splits the serving docs from #42263",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42853",
+    "created_at": "2025-12-12T22:44:08Z",
+    "deletions": 247,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42853/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42853",
+    "labels": [],
+    "merged": true,
+    "number": 42853,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[docs] serving",
+    "updated_at": "2026-01-30T17:45:26Z"
+  },
+  {
+    "additions": 32,
+    "author": "yao-matrix",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "remove ipex and ccl for xpu and cpu, since all ipex optimizations are upstreamed to PyTorch or kernels-community; ccl for xpu is upstreamed to PyTorch built-in xccl backend; ccl for cpu will use gloo",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42852",
+    "created_at": "2025-12-12T22:18:43Z",
+    "deletions": 126,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42852/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42852",
+    "labels": [],
+    "merged": true,
+    "number": 42852,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "remove ipex and ccl for xpu and cpu",
+    "updated_at": "2026-02-03T15:24:36Z"
+  },
+  {
+    "additions": 1482,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title ~",
+    "changed_files": 117,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 52,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42848",
+    "created_at": "2025-12-12T19:11:45Z",
+    "deletions": 7100,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42848/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42848",
+    "labels": [],
+    "merged": true,
+    "number": 42848,
+    "review_comments_count": 39,
+    "state": "closed",
+    "title": ":rotating_light: [`Attn`] New attn mask interface everywhere",
+    "updated_at": "2026-02-09T15:44:55Z"
+  },
+  {
+    "additions": 1548,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Removes `requests` code from setup, src and utils files. Fixes #42817 partially - [x] **Quality checks**: `make fixup` passes with no errors cc @CoderTCY @Wauplin @Rocketknight1 notes - 1. I've removed `requests` fr\u2026",
+    "changed_files": 256,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42845",
+    "created_at": "2025-12-12T17:57:18Z",
+    "deletions": 848,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42845/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42845",
+    "labels": [],
+    "merged": true,
+    "number": 42845,
+    "review_comments_count": 17,
+    "state": "closed",
+    "title": "Remove `requests` code",
+    "updated_at": "2026-02-08T11:11:15Z"
+  },
+  {
+    "additions": 1,
+    "author": "jackzhxng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Pass in `position_ids` so that the custom attention interface implementations have access to it. ## Who can review? @ArthurZucker @Cyrilvallez",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42842",
+    "created_at": "2025-12-12T16:03:48Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42842/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42842",
+    "labels": [],
+    "merged": false,
+    "number": 42842,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Pass in position_ids into attention for GPT2",
+    "updated_at": "2026-02-09T16:49:19Z"
+  },
+  {
+    "additions": 35,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix #42792",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42830",
+    "created_at": "2025-12-12T05:30:43Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42830/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42830",
+    "labels": [],
+    "merged": true,
+    "number": 42830,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix speccht5_tts pipeline",
+    "updated_at": "2026-04-20T02:29:49Z"
+  },
+  {
+    "additions": 81,
+    "author": "Blaizzy",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds MLX tensor support to `BatchFeature` and removes the PyTorch-only restriction in fast image processing utilities. **Changes:** - Added `is_mlx_array` export to `transformers/utils/__init__.py` - Implemented MLX\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42824",
+    "created_at": "2025-12-11T22:46:49Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42824/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42824",
+    "labels": [],
+    "merged": false,
+    "number": 42824,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "Fix torch only support for fast Processors",
+    "updated_at": "2026-04-13T08:21:06Z"
+  },
+  {
+    "additions": 41,
+    "author": "kho",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - Update the goldens for `test_model_integration_batched` - Add an integration test for LASR using pipe and chunked decoding. This is separated from https://github.com/huggingface/transformers/pull/42720 per request\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 26,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42823",
+    "created_at": "2025-12-11T20:37:51Z",
+    "deletions": 22,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42823/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42823",
+    "labels": [],
+    "merged": true,
+    "number": 42823,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Add an integration test for LASR using pipe and chunked decoding",
+    "updated_at": "2026-03-12T10:15:44Z"
+  },
+  {
+    "additions": 24,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Merges the two kernel tests files into one file, and remove redundant tests",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42819",
+    "created_at": "2025-12-11T16:20:30Z",
+    "deletions": 122,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42819/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42819",
+    "labels": [],
+    "merged": true,
+    "number": 42819,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[kernels] Centralize kernels tests",
+    "updated_at": "2026-02-02T09:13:44Z"
+  },
+  {
+    "additions": 1393,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "<img width=\"354\" height=\"475\" alt=\"image\" src=\"https://github.com/user-attachments/assets/64dd14da-387b-454a-863b-c2e3c7ef965d\" /> - distributed training CI (https://github.com/huggingface/transformers/pull/42765) needs FSDP to be refactor\u2026",
+    "changed_files": 120,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42809",
+    "created_at": "2025-12-11T14:04:32Z",
+    "deletions": 990,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42809/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42809",
+    "labels": [],
+    "merged": true,
+    "number": 42809,
+    "review_comments_count": 39,
+    "state": "closed",
+    "title": "refactor + robusts tests for Tensor Parallel ",
+    "updated_at": "2026-02-02T14:26:40Z"
+  },
+  {
+    "additions": 5,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "It returns error `AttributeError: 'BlockMask' object has no attribute 'dtype'` when running test case: `pytest -rA tests/models/lasr/test_modeling_lasr.py::LasrEncoderModelTest::test_flex_attention_with_grads`. This PR will fix this bug. @\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42802",
+    "created_at": "2025-12-11T09:59:50Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42802/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42802",
+    "labels": [],
+    "merged": true,
+    "number": 42802,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "fix error: 'BlockMask' object has no attribute 'dtype' for lasr model",
+    "updated_at": "2026-04-13T02:41:06Z"
+  },
+  {
+    "additions": 3,
+    "author": "npurson",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42797",
+    "created_at": "2025-12-11T08:17:52Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42797",
+    "labels": [],
+    "merged": false,
+    "number": 42797,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix LengthGroupedSampler compatibility with BatchFeature for multimodal models",
+    "updated_at": "2026-02-25T09:21:47Z"
+  },
+  {
+    "additions": 9337,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds https://huggingface.co/microsoft/VibeVoice-Realtime-0.5B Depends on #40546 cc @eustlb, @alvarobartt",
+    "changed_files": 46,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42781",
+    "created_at": "2025-12-10T18:55:33Z",
+    "deletions": 16,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/42781/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42781",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 42781,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add VibeVoice Realtime",
+    "updated_at": "2026-04-13T07:52:21Z"
+  },
+  {
+    "additions": 2,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Devstral",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42772",
+    "created_at": "2025-12-10T13:36:49Z",
+    "deletions": 3,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/42772/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42772",
+    "labels": [],
+    "merged": false,
+    "number": 42772,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "does this fix?",
+    "updated_at": "2026-01-27T10:53:34Z"
+  },
+  {
+    "additions": 18,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42769",
+    "created_at": "2025-12-10T12:31:18Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42769/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42769",
+    "labels": [],
+    "merged": true,
+    "number": 42769,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix weight decay exclusions in `run_*_no\u2011trainer.py` examples",
+    "updated_at": "2026-02-12T16:25:08Z"
+  },
+  {
+    "additions": 618,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "it needs several PR: - Add FSDP v2 (with dtensor for now) natively to Transformers (not rely on transformer anymore) => needs to compare correctness through DDP - Check save/load of TP/FSDP/FSDP+TP - distributed training ci - use gloo back\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42765",
+    "created_at": "2025-12-10T11:06:27Z",
+    "deletions": 29,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/42765/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42765",
+    "labels": [],
+    "merged": false,
+    "number": 42765,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add distributed training CI",
+    "updated_at": "2026-02-19T10:46:18Z"
+  },
+  {
+    "additions": 21,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix xpu output check for Ministral3 tests",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42761",
+    "created_at": "2025-12-10T07:42:20Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42761/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42761",
+    "labels": [],
+    "merged": true,
+    "number": 42761,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix xpu output check for Ministral3 tests",
+    "updated_at": "2026-04-20T02:29:48Z"
+  },
+  {
+    "additions": 330,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/42645 Depends on https://github.com/huggingface/transformers/pull/42667 Add robustness to processor handling of subprocessors loading and saving by checking t\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42668",
+    "created_at": "2025-12-05T23:21:37Z",
+    "deletions": 62,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42668/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42668",
+    "labels": [],
+    "merged": false,
+    "number": 42668,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "More robust processor from pretrained",
+    "updated_at": "2026-02-11T12:38:46Z"
+  },
+  {
+    "additions": 618,
+    "author": "YanivDorGalron",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Many discussions focused on speculative decoding with 'batch_size > 1' (#26875, #29769, #32165, #32189), but none were fully implemented. This PR does that. ### **Summary of Changes: Enable Batched Speculative Decod\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42655",
+    "created_at": "2025-12-05T14:58:11Z",
+    "deletions": 117,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42655/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42655",
+    "labels": [],
+    "merged": false,
+    "number": 42655,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "New Feature: Enabling Speculative Decoding with Batch Size > 1 (If draft and target model share tokenizer)",
+    "updated_at": "2026-02-26T07:42:45Z"
+  },
+  {
+    "additions": 27,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix xpu tests in compressed tensors by mapping different expect outputs.",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42643",
+    "created_at": "2025-12-05T04:55:46Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42643/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42643",
+    "labels": [],
+    "merged": true,
+    "number": 42643,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Compress",
+    "updated_at": "2026-04-20T02:29:47Z"
+  },
+  {
+    "additions": 298,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This Pr cleans a bit the quantization methods class. I also updated the contribution doc",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42633",
+    "created_at": "2025-12-04T18:17:51Z",
+    "deletions": 1173,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42633/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42633",
+    "labels": [],
+    "merged": true,
+    "number": 42633,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "small cleaning of quantization class ",
+    "updated_at": "2026-03-18T20:18:41Z"
+  },
+  {
+    "additions": 10,
+    "author": "arrdel",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #42585 This PR fixes a dimensional mismatch bug in the Mistral3 multimodal projector when using multiple vision feature layers. ## The Problem When `vision_feature_layer` is configured as a list (e.g., `[-1,\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42602",
+    "created_at": "2025-12-03T16:10:18Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42602/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42602",
+    "labels": [],
+    "merged": false,
+    "number": 42602,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix dimensional mismatch in Mistral3MultiModalProjector for multiple vision feature layers",
+    "updated_at": "2026-02-04T16:20:28Z"
+  },
+  {
+    "additions": 6561,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Add return_dict to get_text_features, get_image_features, get_audio_features, get_video_features methods to return 'BaseModelOutputWithPooling' by default Fixes #42401 The architectures supporting `get_image_featu\u2026",
+    "changed_files": 253,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42564",
+    "created_at": "2025-12-02T16:00:11Z",
+    "deletions": 4095,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42564/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42564",
+    "labels": [],
+    "merged": true,
+    "number": 42564,
+    "review_comments_count": 238,
+    "state": "closed",
+    "title": "[`BC`] Update `get_(text|image|audio|video)_features` methods to return `BaseModelOutputWithPooling`",
+    "updated_at": "2026-02-10T07:11:50Z"
+  },
+  {
+    "additions": 1274,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? 1. Update `TokenizersBackend` to simplify converting from sentencepiece. 2. Add the `model` attribute to guide the conversion. 3. Adds the `convert_from_spm` for special cases when the spm is not straight forward. 4\u2026",
+    "changed_files": 84,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42563",
+    "created_at": "2025-12-02T15:49:26Z",
+    "deletions": 2854,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42563/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42563",
+    "labels": [],
+    "merged": true,
+    "number": 42563,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Refactor-tokenization-more",
+    "updated_at": "2026-01-26T13:21:04Z"
+  },
+  {
+    "additions": 4105,
+    "author": "paultltc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 24,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 26,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42504",
+    "created_at": "2025-12-01T08:37:48Z",
+    "deletions": 37,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42504/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42504",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 42504,
+    "review_comments_count": 213,
+    "state": "closed",
+    "title": "Add ModernVBERT models",
+    "updated_at": "2026-02-23T18:43:07Z"
+  },
+  {
+    "additions": 11,
+    "author": "Aravind-11",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds a `skip_post_init` parameter to `PreTrainedModel.from_pretrained()` to allow users to skip the post-initialization step that reinitializes model weights. This is essential for users who subclass models\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42497",
+    "created_at": "2025-11-30T00:43:11Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/42497/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42497",
+    "labels": [],
+    "merged": false,
+    "number": 42497,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add `skip_post_init` flag",
+    "updated_at": "2026-02-17T09:59:57Z"
+  },
+  {
+    "additions": 476,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "## :rotating_light: Breaking change - FA2 is only supported from version `2.3.3` and on This is due to the fact that this is older than 2+ years (we deprecate torch in 2 year cycles for example) as well as it giving fairly high maintenance\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42435",
+    "created_at": "2025-11-26T20:37:58Z",
+    "deletions": 246,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42435/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42435",
+    "labels": [],
+    "merged": true,
+    "number": 42435,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": ":rotating_light: [`FA4`] Initial support",
+    "updated_at": "2026-03-27T15:10:10Z"
+  },
+  {
+    "additions": 102,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "- Since v5 pushes, it introduces MoE refactor which break compatibility with VLLM. I dont think `torch.zeros_like(router_logits).scatter_(1, router_indices, router_top_value)` is needed (argument was it is needed during EP but if EP is ena\u2026",
+    "changed_files": 14,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42429",
+    "created_at": "2025-11-26T16:58:10Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42429/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42429",
+    "labels": [
+      "for_v5?"
+    ],
+    "merged": false,
+    "number": 42429,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix transformers MoE compatibility with VLLM ",
+    "updated_at": "2026-01-26T15:10:35Z"
+  },
+  {
+    "additions": 33,
+    "author": "khushali9",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "What does this PR do? The ask is to use fp32_precision instead of allow_tf32 for Pytorch version >= 2.9.0 for CUDA as pointed out in this [doc](https://docs.pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devi\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42428",
+    "created_at": "2025-11-26T16:41:09Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42428/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42428",
+    "labels": [],
+    "merged": true,
+    "number": 42428,
+    "review_comments_count": 10,
+    "state": "closed",
+    "title": "logic to select tf32 API as per Pytorch version",
+    "updated_at": "2026-04-06T05:13:47Z"
+  },
+  {
+    "additions": 844,
+    "author": "sambhavnoobcoder",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Problem Statement Flash Attention 4 represents a significant architectural shift in the flash-attention package: 1. **Different import path**: FA4 uses `flash_attn.cute` submodule instead of the main `flash_attn` package 2. **API incomp\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42404",
+    "created_at": "2025-11-25T17:30:39Z",
+    "deletions": 24,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42404/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42404",
+    "labels": [],
+    "merged": false,
+    "number": 42404,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Flash Attention 4 (CuTe DSL) Support",
+    "updated_at": "2026-03-13T19:32:04Z"
+  },
+  {
+    "additions": 20,
+    "author": "HichTala",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This pull request makes a targeted change to the model loading logic in `modeling_utils.py` to improve compatibility with timm backbones. Specifically, it ensures that when a model is configured to use a timm backbo\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42284",
+    "created_at": "2025-11-19T15:18:05Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42284/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42284",
+    "labels": [],
+    "merged": false,
+    "number": 42284,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Remove timm backbone weight loading to prevent meta-tensor warnings in `from_pretrained`",
+    "updated_at": "2026-02-05T10:59:42Z"
+  },
+  {
+    "additions": 1,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh @IlyasMoutawwakil pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42230",
+    "created_at": "2025-11-17T01:52:33Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42230/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42230",
+    "labels": [],
+    "merged": true,
+    "number": 42230,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "calls `AttentionMaskConverter._unmask_unattended` for xpu device before",
+    "updated_at": "2026-04-13T02:40:40Z"
+  },
+  {
+    "additions": 56,
+    "author": "FredHaa",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42227",
+    "created_at": "2025-11-16T13:10:20Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42227/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42227",
+    "labels": [],
+    "merged": false,
+    "number": 42227,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix whisper return language",
+    "updated_at": "2026-03-28T13:05:04Z"
+  },
+  {
+    "additions": 1955,
+    "author": "McClain-Thiel",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42210",
+    "created_at": "2025-11-14T12:33:32Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42210/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42210",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 42210,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] started adding support for evo2",
+    "updated_at": "2026-02-11T12:40:09Z"
+  },
+  {
+    "additions": 174,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Following https://github.com/huggingface/transformers/pull/41900. The mask is (and should!!) be correctly prepared, with the correct shape. If not, then it0s better to crash immediately, as otherwi\u2026",
+    "changed_files": 218,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42186",
+    "created_at": "2025-11-13T15:05:41Z",
+    "deletions": 395,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42186/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42186",
+    "labels": [],
+    "merged": true,
+    "number": 42186,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Remove mask slicing in all eager attentions",
+    "updated_at": "2026-02-10T17:45:59Z"
+  },
+  {
+    "additions": 2,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The CI seems a bit unstable and this test is the most common culprit! cc @ydshieh",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42184",
+    "created_at": "2025-11-13T14:12:46Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42184/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42184",
+    "labels": [],
+    "merged": true,
+    "number": 42184,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Mark test_prompt_lookup_decoding as flaky",
+    "updated_at": "2026-01-27T13:58:25Z"
+  },
+  {
+    "additions": 2517,
+    "author": "Chenhao-Guan",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Resolves #41862 Hi @zucchini-nlp and @Rocketknight1, Following your guidance in the issue, this PR re-implements the InternVL-Flash model as a completely separate model (instead of using an if flag in the existing InternVL class). Implemen\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42166",
+    "created_at": "2025-11-12T15:40:53Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42166",
+    "labels": [],
+    "merged": false,
+    "number": 42166,
+    "review_comments_count": 46,
+    "state": "open",
+    "title": "add internvl_flash model",
+    "updated_at": "2026-01-24T00:40:20Z"
+  },
+  {
+    "additions": 10,
+    "author": "Priyanshjain10",
+    "author_association": "NONE",
+    "body_excerpt": "\u2026better error messaging Fixes #42141 When loading models with AWQ quantization, users encountered unclear ModuleNotFoundError when the awq package structure didn't match expectations. This fix wraps the awq module imports in try-except blo\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42160",
+    "created_at": "2025-11-12T11:24:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42160/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42160",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 42160,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: add try-except error handling for AWQ module imports to provide \u2026",
+    "updated_at": "2026-03-18T14:11:35Z"
+  },
+  {
+    "additions": 163,
+    "author": "Aravind-11",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Implements SDPA for OWL VIT. Fixes #28103 ## Before submitting - [x] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ]\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 67,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42136",
+    "created_at": "2025-11-10T23:38:23Z",
+    "deletions": 166,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42136",
+    "labels": [],
+    "merged": true,
+    "number": 42136,
+    "review_comments_count": 43,
+    "state": "closed",
+    "title": "Sdpa for owlvit",
+    "updated_at": "2026-03-17T19:59:55Z"
+  },
+  {
+    "additions": 1,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Addresses #41960 to update to torch 2.3",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42135",
+    "created_at": "2025-11-10T20:52:19Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42135",
+    "labels": [],
+    "merged": true,
+    "number": 42135,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[docs] Update torch version",
+    "updated_at": "2026-02-05T16:30:52Z"
+  },
+  {
+    "additions": 721,
+    "author": "AndresAlgaba",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Add feature described in Issue #42111 # What does this PR do? A built-in way to cap how many tokens a reasoning model spends inside its ``<think> \u2026 </think>`` block. Today, we can only control the total response length via ``max_new_tokens\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42112",
+    "created_at": "2025-11-09T10:17:17Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42112/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42112",
+    "labels": [],
+    "merged": false,
+    "number": 42112,
+    "review_comments_count": 17,
+    "state": "open",
+    "title": "Add max_thinking_tokens for reasoning models (issue #42111)",
+    "updated_at": "2026-02-02T10:51:15Z"
+  },
+  {
+    "additions": 1559,
+    "author": "rogeryoungh",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 41,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/42028",
+    "created_at": "2025-11-05T09:26:38Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/42028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/42028",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 42028,
+    "review_comments_count": 56,
+    "state": "closed",
+    "title": "Add support for MiniMax-M2",
+    "updated_at": "2026-02-03T16:06:42Z"
+  },
+  {
+    "additions": 5038,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Edit: some PRs were opened taking pieces of this one, like #42697 and #42317 so now it's mostly about HfExporters \ud83e\udd17 This is an attempt at standardizing native transformers *support* of an export backend (dynamo, onn\u2026",
+    "changed_files": 82,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41992",
+    "created_at": "2025-11-03T14:20:21Z",
+    "deletions": 2192,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41992/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41992",
+    "labels": [],
+    "merged": false,
+    "number": 41992,
+    "review_comments_count": 41,
+    "state": "open",
+    "title": "[PoC] HF exporters",
+    "updated_at": "2026-04-13T08:41:05Z"
+  },
+  {
+    "additions": 40,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "In t5 model, as dtype of `self.wo.weight` is kept fp32 in [L783](https://github.com/huggingface/transformers/blob/v4.57.1/src/transformers/models/t5/modeling_t5.py#L783), `hidden_states` need to be converted to fp32 in some cases, we shoul\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41912",
+    "created_at": "2025-10-28T10:32:20Z",
+    "deletions": 15,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/41912/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41912",
+    "labels": [],
+    "merged": false,
+    "number": 41912,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "restore dtype of `hidden_states` in modeling_t5.py",
+    "updated_at": "2026-04-13T02:40:43Z"
+  },
+  {
+    "additions": 37,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41905",
+    "created_at": "2025-10-28T02:26:10Z",
+    "deletions": 39,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/41905/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41905",
+    "labels": [],
+    "merged": false,
+    "number": 41905,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "upgrade natten to 0.21 version",
+    "updated_at": "2026-04-13T02:40:51Z"
+  },
+  {
+    "additions": 4,
+    "author": "justinchuby",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update pytree registration for DynamicCache. Before this change the cache values are flatten as `(key0, key1, ..., value0, value1, ...)`. This change matches the old cache interface by flattening to `(key0, value0,\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41901",
+    "created_at": "2025-10-27T18:36:11Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41901/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41901",
+    "labels": [],
+    "merged": false,
+    "number": 41901,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[executorch] Update pytree registration for DynamicCache",
+    "updated_at": "2026-01-27T12:26:40Z"
+  },
+  {
+    "additions": 4823,
+    "author": "binwang777",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? [FG-CLIP2](https://arxiv.org/abs/2510.10921) is a new generation of text-image cross-modal model excels in fine-grained discrimination and embedding. It is the foundation model for fine-grained vision-language under\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41886",
+    "created_at": "2025-10-27T08:43:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41886/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41886",
+    "labels": [],
+    "merged": false,
+    "number": 41886,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "ADD FG-CLIP2",
+    "updated_at": "2026-02-11T07:08:07Z"
+  },
+  {
+    "additions": 5543,
+    "author": "molbap",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title. Architecturally: Llava-next used as skeleton with a modified SamModel and a modified ClipVisionModel, keeping the deepseekV2 decoder untouched (using AutoModel) and changing using config only. - [x] Wo\u2026",
+    "changed_files": 20,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41797",
+    "created_at": "2025-10-22T21:04:03Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41797",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 41797,
+    "review_comments_count": 16,
+    "state": "open",
+    "title": "Add deepseek ocr",
+    "updated_at": "2026-03-01T09:13:59Z"
+  },
+  {
+    "additions": 2686,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41763",
+    "created_at": "2025-10-21T10:43:25Z",
+    "deletions": 51,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41763/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41763",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 41763,
+    "review_comments_count": 111,
+    "state": "closed",
+    "title": "Timesfm 2.5",
+    "updated_at": "2026-02-27T11:59:54Z"
+  },
+  {
+    "additions": 271,
+    "author": "lakshinhemachandran",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR adds Tamil README.md documentation to make development with Hugging Face Transformers easier for Tamil developers. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the ot\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41722",
+    "created_at": "2025-10-18T16:40:47Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41722/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41722",
+    "labels": [],
+    "merged": false,
+    "number": 41722,
+    "review_comments_count": 15,
+    "state": "closed",
+    "title": "Add Tamil README.md Documentation.",
+    "updated_at": "2026-03-19T20:29:16Z"
+  },
+  {
+    "additions": 9245,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR aims at refactoring ViT as part of an effort to standardize vision models in the library, similarly to https://github.com/huggingface/transformers/pull/41549 # Vision Models Refactoring - Chronological Todo\u2026",
+    "changed_files": 96,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41693",
+    "created_at": "2025-10-17T15:05:33Z",
+    "deletions": 5162,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41693/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41693",
+    "labels": [],
+    "merged": false,
+    "number": 41693,
+    "review_comments_count": 228,
+    "state": "open",
+    "title": "\ud83d\udea8 Refactor ViT to updated standards",
+    "updated_at": "2026-04-15T20:22:13Z"
+  },
+  {
+    "additions": 71,
+    "author": "jackzhxng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Makes Idefics 3 exportable by assuming batch size of 1, unrolling the loop to a single iteration. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41629",
+    "created_at": "2025-10-15T16:35:37Z",
+    "deletions": 29,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/41629/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41629",
+    "labels": [
+      "torch export"
+    ],
+    "merged": false,
+    "number": 41629,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Make Idefics exportable, assume no padding",
+    "updated_at": "2026-02-10T04:39:12Z"
+  },
+  {
+    "additions": 6,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@MekkCyber @drbh pls help review, thx! We did a benchmark using following scripts on Intel XPU PVC Max 1550: ``` import gc import torch import time import transformers from transformers import AutoConfig, AutoProcessor, GenerationConfig, s\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41563",
+    "created_at": "2025-10-14T05:49:13Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41563/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41563",
+    "labels": [],
+    "merged": true,
+    "number": 41563,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add rmsnorm kernels support for Intel XPU",
+    "updated_at": "2026-04-13T02:41:39Z"
+  },
+  {
+    "additions": 8332,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR aims at refactoring DETR as part of an effort to standardize vision models in the library, in the same vein as https://github.com/huggingface/transformers/pull/41546. Expect to see much more PRs like this fo\u2026",
+    "changed_files": 30,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41549",
+    "created_at": "2025-10-13T16:57:33Z",
+    "deletions": 5511,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41549/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41549",
+    "labels": [],
+    "merged": true,
+    "number": 41549,
+    "review_comments_count": 149,
+    "state": "closed",
+    "title": "\ud83d\udea8 Refactor DETR to updated standards",
+    "updated_at": "2026-02-02T23:42:50Z"
+  },
+  {
+    "additions": 139,
+    "author": "molbap",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title. Adds the nice recent modeling utils to CLIP. Was motivated to be able to use it in the looong-standing #33962 .",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41546",
+    "created_at": "2025-10-13T15:21:26Z",
+    "deletions": 301,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41546/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41546",
+    "labels": [],
+    "merged": true,
+    "number": 41546,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Modernize CLIP modeling code ",
+    "updated_at": "2026-02-06T08:14:36Z"
+  },
+  {
+    "additions": 1483,
+    "author": "hainan-xv",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Parakeet TDT model integration. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41545",
+    "created_at": "2025-10-13T13:46:50Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41545/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41545",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 41545,
+    "review_comments_count": 42,
+    "state": "closed",
+    "title": "TDT for HF",
+    "updated_at": "2026-03-09T13:04:40Z"
+  },
+  {
+    "additions": 11464,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- standardizes all model docs to include a generated summary of the abstract/blog, a `Pipeline` and `AutoModel` or `ModelForTask` example and usage tips - updates `add_dates.py` to also add contributor names at the top - removes PyTorch ba\u2026",
+    "changed_files": 423,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41459",
+    "created_at": "2025-10-08T18:36:51Z",
+    "deletions": 28592,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41459/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41459",
+    "labels": [],
+    "merged": false,
+    "number": 41459,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[docs] Standardize model docs",
+    "updated_at": "2026-02-11T20:56:26Z"
+  },
+  {
+    "additions": 15,
+    "author": "shawntan",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds ScatterMoE kernel support for Granite MoE models. Started in #40365 but has significantly deviated in approach, so starting a new pull request. ## Before submitting - [x] Did you read the [contributor guideline\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41458",
+    "created_at": "2025-10-08T18:05:32Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41458/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41458",
+    "labels": [],
+    "merged": false,
+    "number": 41458,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Adding ScatterMoE kernel support for Granite models. ",
+    "updated_at": "2026-01-29T21:23:11Z"
+  },
+  {
+    "additions": 6,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR enhance the handling of Union types containing `str` by two rules: 1. If `str` in `Union` and `Union` has more than one other types pass `str` to `parser.add_argument`. The caller is responsible to convert t\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41441",
+    "created_at": "2025-10-08T10:41:51Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41441/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41441",
+    "labels": [],
+    "merged": false,
+    "number": 41441,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Enhance the handling of Union types in HfArgumentParser",
+    "updated_at": "2026-02-11T12:33:03Z"
+  },
+  {
+    "additions": 40,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR changes the default for reporting to \"none\" as we move to v5. Moreover, we clean some deprecations messages around some env env + we deprecate `logging_dir` that was solely used for tensorboard. Instead the\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41375",
+    "created_at": "2025-10-06T15:00:27Z",
+    "deletions": 115,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41375/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41375",
+    "labels": [],
+    "merged": true,
+    "number": 41375,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "`report_to` default changed to \"none\" + cleaning deprecated env var",
+    "updated_at": "2026-02-20T17:18:40Z"
+  },
+  {
+    "additions": 512,
+    "author": "Vinayaktoor",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR introduces the DEIMv2 model along with its configuration, image processor, and unit tests. The changes include: Model Implementation: Added Deimv2ForObjectDetection supporting object detection tasks. Configu\u2026",
+    "changed_files": 10,
+    "cluster_id": "cluster-41211-3",
+    "cluster_ids": [
+      "cluster-41211-3"
+    ],
+    "cluster_role": "member",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41356",
+    "created_at": "2025-10-05T21:35:01Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41356/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41356",
+    "labels": [],
+    "merged": false,
+    "number": 41356,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add DEIMv2 model, image processor, and basic tests",
+    "updated_at": "2026-02-09T17:25:26Z"
+  },
+  {
+    "additions": 558,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds (Full) Regex and (Partial) Tokenization GIL=0 free-threading support. Tested up to Python 3.14T. In simple terms, Transformers code that relies on regex will segfault under true concurrency. I have confirmed wi\u2026",
+    "changed_files": 35,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41329",
+    "created_at": "2025-10-03T18:23:49Z",
+    "deletions": 52,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41329/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41329",
+    "labels": [],
+    "merged": false,
+    "number": 41329,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "Fix GIL=0 segfault and Add GIL=0 compat for regex paths",
+    "updated_at": "2026-03-23T01:33:46Z"
+  },
+  {
+    "additions": 45,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? PyTorch 2.10 has been released. Looking forward to lift the PT minimum version again.",
+    "changed_files": 70,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41307",
+    "created_at": "2025-10-03T07:46:41Z",
+    "deletions": 416,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41307/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41307",
+    "labels": [],
+    "merged": true,
+    "number": 41307,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update torch minimum version to 2.4",
+    "updated_at": "2026-02-05T00:09:38Z"
+  },
+  {
+    "additions": 3323,
+    "author": "zbloss",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds the new Hierarchical Reasoning Model. Fixes #41271 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contributor\u2026",
+    "changed_files": 13,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41272",
+    "created_at": "2025-10-01T23:54:41Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41272/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41272",
+    "labels": [],
+    "merged": false,
+    "number": 41272,
+    "review_comments_count": 56,
+    "state": "open",
+    "title": "feat: Add HRM Model",
+    "updated_at": "2026-02-11T12:33:57Z"
+  },
+  {
+    "additions": 1504,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? ```python from transformers import FineGrainedFP8Config, AutoModelForCausalLM, AutoTokenizer import torch model_name = \"deepseek-ai/DeepSeek-V3.2\" quantization_config = FineGrainedFP8Config( modules_to_not_convert=[\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41251",
+    "created_at": "2025-10-01T12:41:21Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41251",
+    "labels": [],
+    "merged": false,
+    "number": 41251,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add deepseek 3.2 exp",
+    "updated_at": "2026-04-14T16:49:12Z"
+  },
+  {
+    "additions": 22671,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title. Continues from https://github.com/huggingface/transformers/pull/40793 and supersedes https://github.com/huggingface/transformers/pull/36534 NOTE: config classes can't accept positional args anymore! I\u2026",
+    "changed_files": 891,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41250",
+    "created_at": "2025-10-01T11:50:47Z",
+    "deletions": 39076,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41250/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41250",
+    "labels": [],
+    "merged": true,
+    "number": 41250,
+    "review_comments_count": 25,
+    "state": "closed",
+    "title": ":rotating_light: Validate config attributes",
+    "updated_at": "2026-03-27T16:19:40Z"
+  },
+  {
+    "additions": 217,
+    "author": "dimidagd",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "@yonigozlan @molbap",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41224",
+    "created_at": "2025-09-30T13:00:02Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41224/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41224",
+    "labels": [],
+    "merged": false,
+    "number": 41224,
+    "review_comments_count": 19,
+    "state": "open",
+    "title": "Add DINOv3ViTForImageClassification support",
+    "updated_at": "2026-03-12T09:06:39Z"
+  },
+  {
+    "additions": 3340,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds EoMT with a DINOv3 backbone. The authors of EoMT released new checkpoints which swap the DINOv2 backbone by the newer DINOv3: https://github.com/tue-mps/eomt/blob/master/model_zoo/dinov3.md ## Disclaime\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41212",
+    "created_at": "2025-09-30T09:55:46Z",
+    "deletions": 105,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41212",
+    "labels": [],
+    "merged": true,
+    "number": 41212,
+    "review_comments_count": 81,
+    "state": "closed",
+    "title": "Add EoMT with DINOv3 backbone",
+    "updated_at": "2026-02-02T16:09:05Z"
+  },
+  {
+    "additions": 47,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41145",
+    "created_at": "2025-09-25T04:42:12Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/41145/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41145",
+    "labels": [],
+    "merged": false,
+    "number": 41145,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Rope kernel support to Qwen3 model",
+    "updated_at": "2026-04-13T02:40:45Z"
+  },
+  {
+    "additions": 11,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? It works by refactoring `_get_unpad_data`",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41097",
+    "created_at": "2025-09-23T10:51:04Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/41097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41097",
+    "labels": [],
+    "merged": false,
+    "number": 41097,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Delay and probably avoid unnecessary graph breaks in _upad_input of modeling_flash_attention_utils.py",
+    "updated_at": "2026-02-11T12:31:59Z"
+  },
+  {
+    "additions": 5199,
+    "author": "hainan-xv",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 27,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/41061",
+    "created_at": "2025-09-22T14:27:57Z",
+    "deletions": 10,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/41061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/41061",
+    "labels": [],
+    "merged": false,
+    "number": 41061,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Tdt support",
+    "updated_at": "2026-03-09T13:02:23Z"
+  },
+  {
+    "additions": 621,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? This has been a long-standing feature request, and I'd love to hear your thoughts on how I approached resolving it. I\u2019ve tested the implementation on the GLUE benchmark and added unit tests for the new logic; happ\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40995",
+    "created_at": "2025-09-19T08:51:48Z",
+    "deletions": 47,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/40995/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40995",
+    "labels": [],
+    "merged": false,
+    "number": 40995,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "feat(trainer): Add support for multiple datasets and domain-specific loss",
+    "updated_at": "2026-02-20T17:20:26Z"
+  },
+  {
+    "additions": 5927,
+    "author": "AkshatSh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Perceptron Isaac Implementation Perceptron released open weight models [Isaac-0.1](https://huggingface.co/PerceptronAI/Isaac-0.1) and [Isaac-0.1-Base](https://huggingface.co/PerceptronAI/Isaac-0.1-Base) a 2B dense model for perception.",
+    "changed_files": 21,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40962",
+    "created_at": "2025-09-18T07:05:39Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40962/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40962",
+    "labels": [],
+    "merged": false,
+    "number": 40962,
+    "review_comments_count": 462,
+    "state": "open",
+    "title": "perceptron: Isaac-0.1 implementation",
+    "updated_at": "2026-04-14T14:58:41Z"
+  },
+  {
+    "additions": 25,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "A first time contributor to HF here! ### What does this PR do? \u2192 Fixes a `RuntimeError` that occurs when loading the Gemma 3n model with an outdated version of the `timm` library; it's caused by the absence of the required `mobilenetv5_300\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40951",
+    "created_at": "2025-09-17T20:29:13Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40951/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40951",
+    "labels": [],
+    "merged": true,
+    "number": 40951,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "fix(timm): Catch 'Unknown model' RuntimeError in the Gemma 3n MobileNetV5 vision encoder",
+    "updated_at": "2026-02-23T10:04:28Z"
+  },
+  {
+    "additions": 598,
+    "author": "ahadnagy",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds bert, gemma3, gpt, mistral3 and qwen2 to the new benchmarking pipeline. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ]\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40820",
+    "created_at": "2025-09-11T14:07:36Z",
+    "deletions": 69,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40820/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40820",
+    "labels": [],
+    "merged": false,
+    "number": 40820,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Add models to benchmarks",
+    "updated_at": "2026-02-11T12:31:34Z"
+  },
+  {
+    "additions": 3792,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Apart from obvious tf/jax support, I believe the following should be the only potential breaking changes to torch-only code: - pipelines do not take `framework` argument anymore - onnx config methods do not take `fr\u2026",
+    "changed_files": 854,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40760",
+    "created_at": "2025-09-09T08:32:43Z",
+    "deletions": 181269,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40760/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40760",
+    "labels": [
+      "for_v5?"
+    ],
+    "merged": true,
+    "number": 40760,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "\ud83d\udea8\ud83d\udea8\ud83d\udea8 Fully remove Tensorflow and Jax support library-wide",
+    "updated_at": "2026-01-27T13:47:25Z"
+  },
+  {
+    "additions": 1911,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add support for forecasting with Covariates",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40755",
+    "created_at": "2025-09-08T13:05:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40755/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40755",
+    "labels": [],
+    "merged": false,
+    "number": 40755,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[TimesFM] Add support for forecasting with covariates",
+    "updated_at": "2026-03-06T08:29:28Z"
+  },
+  {
+    "additions": 142,
+    "author": "moonrunnerkc",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes incorrect loss normalization in the Trainer when running on multiple GPUs. The previous implementation always averaged losses, which under-reported values in token-level training. The new implementation provides a clean, toke\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40747",
+    "created_at": "2025-09-07T17:42:44Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40747/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40747",
+    "labels": [],
+    "merged": false,
+    "number": 40747,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: Proper loss aggregation in Trainer with token-aware reduction",
+    "updated_at": "2026-02-18T14:54:29Z"
+  },
+  {
+    "additions": 79,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Always import torch in some source files that are PT only.",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40742",
+    "created_at": "2025-09-07T06:16:08Z",
+    "deletions": 72,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/40742/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40742",
+    "labels": [],
+    "merged": false,
+    "number": 40742,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Assume torch in certain files",
+    "updated_at": "2026-02-04T14:09:05Z"
+  },
+  {
+    "additions": 3680,
+    "author": "pengzhiliang",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Merge the model from https://github.com/microsoft/VibeVoice/tree/main HF: https://huggingface.co/microsoft/VibeVoice-1.5B",
+    "changed_files": 30,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40546",
+    "created_at": "2025-08-29T12:47:34Z",
+    "deletions": 18,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40546/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40546",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 40546,
+    "review_comments_count": 129,
+    "state": "open",
+    "title": "Implement VibeVoice ",
+    "updated_at": "2026-04-13T07:52:31Z"
+  },
+  {
+    "additions": 229,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Not sure how this got in here, but adapt aria cross attention to our standards",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40452",
+    "created_at": "2025-08-26T08:49:07Z",
+    "deletions": 123,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40452/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40452",
+    "labels": [],
+    "merged": false,
+    "number": 40452,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "update aria to remove wrong attention usage",
+    "updated_at": "2026-01-27T10:52:51Z"
+  },
+  {
+    "additions": 180,
+    "author": "Ahmed-G-ElTaher",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Pull Request: Add Support for Handling Overlapping Annotations in Mask2Former ### Problem The current image processing pipeline for Mask2Former doesn't handle overlapping annotations correctly. When annotations overlap, the processing o\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40446",
+    "created_at": "2025-08-26T02:50:39Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40446/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40446",
+    "labels": [],
+    "merged": false,
+    "number": 40446,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add convert_segmentation_map_to_binary_masks_sorted function for hand\u2026",
+    "updated_at": "2026-03-24T11:03:16Z"
+  },
+  {
+    "additions": 2,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In vLLm inference and SFT training, there are lots of blocking operations of image_grid_thw such as `torch.prod` and `torch.tolist`, so let's always fix image_grid_thw to CPU to avoid them. A simple grep gives the f\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40394",
+    "created_at": "2025-08-23T06:09:27Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40394/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40394",
+    "labels": [],
+    "merged": false,
+    "number": 40394,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix image_grid_thw to be in CPU",
+    "updated_at": "2026-02-18T01:05:14Z"
+  },
+  {
+    "additions": 1937,
+    "author": "thisisiron",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for Ovis2.5, a multimodal vision-language model designed for native-resolution visual perception and reflective reasoning. Ovis2.5 is released in two variants: - [Ovis2.5-9B](https://huggingface\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40321",
+    "created_at": "2025-08-20T15:17:54Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40321/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40321",
+    "labels": [],
+    "merged": false,
+    "number": 40321,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "[WIP] Add Ovis2.5 ",
+    "updated_at": "2026-03-30T06:56:12Z"
+  },
+  {
+    "additions": 6127,
+    "author": "szhengac",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 39,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40294",
+    "created_at": "2025-08-19T19:56:16Z",
+    "deletions": 62,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40294/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40294",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": true,
+    "number": 40294,
+    "review_comments_count": 307,
+    "state": "closed",
+    "title": "Add Higgs Audio V2 Model",
+    "updated_at": "2026-02-20T03:18:40Z"
+  },
+  {
+    "additions": 73,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This ensures that the naming is always the same across MoEs",
+    "changed_files": 21,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40156",
+    "created_at": "2025-08-14T09:19:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40156/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40156",
+    "labels": [],
+    "merged": false,
+    "number": 40156,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "add attribute map for MoE standardization",
+    "updated_at": "2026-01-27T11:18:53Z"
+  },
+  {
+    "additions": 50,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Use RMSNorm from PyTorch 2.3+.",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40066",
+    "created_at": "2025-08-10T08:43:09Z",
+    "deletions": 67,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40066",
+    "labels": [],
+    "merged": true,
+    "number": 40066,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Change Qwen2RMSNorm to RMSNorm from PyTorch",
+    "updated_at": "2026-03-10T06:16:28Z"
+  },
+  {
+    "additions": 1,
+    "author": "wenboqian",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/40048",
+    "created_at": "2025-08-09T00:30:37Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/40048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/40048",
+    "labels": [],
+    "merged": false,
+    "number": 40048,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix Inefficient default GELU implementation in GPT2 #39073",
+    "updated_at": "2026-02-17T13:31:39Z"
+  },
+  {
+    "additions": 4080,
+    "author": "MHRDYN7",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #39893. This pr adds the VideoPrism model by google deepmind. [Original repo](https://github.com/google-deepmind/videoprism)",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 67,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39895",
+    "created_at": "2025-08-04T17:37:52Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39895",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 39895,
+    "review_comments_count": 193,
+    "state": "open",
+    "title": "Add Videoprism",
+    "updated_at": "2026-04-15T10:19:21Z"
+  },
+  {
+    "additions": 134,
+    "author": "matthewdouglas",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds a new option to `BitsAndBytesConfig` called `target_parameters` with the same spirit as `target_parameters` in huggingface/peft#2638. The intent is to allow quantization of `nn.Parameter` that are not w\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39859",
+    "created_at": "2025-08-01T22:24:18Z",
+    "deletions": 25,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/39859/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39859",
+    "labels": [],
+    "merged": false,
+    "number": 39859,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "WIP: Initial support for bnb 4bit on any nn.Parameter",
+    "updated_at": "2026-03-03T16:44:09Z"
+  },
+  {
+    "additions": 1,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Meta just released [MetaCLIP 2 (worldwide)](https://github.com/facebookresearch/MetaCLIP?tab=readme-ov-file#pre-trained-models), new CLIP models trained on 300+ languages. However, when making them compatible with `\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39821",
+    "created_at": "2025-07-31T13:41:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39821/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39821",
+    "labels": [],
+    "merged": false,
+    "number": 39821,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Support MetaCLIP 2",
+    "updated_at": "2026-03-06T02:20:04Z"
+  },
+  {
+    "additions": 1941,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds the Llasa TTS family of models: - 1B: https://huggingface.co/HKUSTAudio/Llasa-1B - 3B: https://huggingface.co/HKUSTAudio/Llasa-3B - 8B: https://huggingface.co/HKUSTAudio/Llasa-8B Reproducers for integra\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39760",
+    "created_at": "2025-07-29T14:42:04Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/39760/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39760",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 39760,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "[Draft] Add Llasa TTS family of models",
+    "updated_at": "2026-04-13T07:52:40Z"
+  },
+  {
+    "additions": 4020,
+    "author": "hhaAndroid",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Adding Intern-S1 This PR adds the support of codes for the Intern-S1 models. Please visit https://huggingface.co/internlm/Intern-S1 ## Features - Strong performance across language and vision reasoning benchmarks, especially scientific t\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 44,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39722",
+    "created_at": "2025-07-28T07:20:05Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39722/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39722",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 39722,
+    "review_comments_count": 140,
+    "state": "open",
+    "title": "[Feat] Adding Intern-S1",
+    "updated_at": "2026-02-11T12:39:49Z"
+  },
+  {
+    "additions": 23,
+    "author": "bonpiedlaroute",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Description Fixes #39191 where transformers ignored accelerate configuration to disable torch.dynamo, leading to unexpected compilation and `FailOnRecompileLimitHit` errors. ## Problem When users configure accelerate to disable torch.dyn\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39683",
+    "created_at": "2025-07-25T21:27:34Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39683/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39683",
+    "labels": [],
+    "merged": false,
+    "number": 39683,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Fix issue #39191 respect accelerate config to disable torch.dynamo compilation",
+    "updated_at": "2026-01-27T12:42:58Z"
+  },
+  {
+    "additions": 1986,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39455",
+    "created_at": "2025-07-16T15:34:06Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39455/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39455",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 39455,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Add eurobert",
+    "updated_at": "2026-03-04T11:43:04Z"
+  },
+  {
+    "additions": 3367,
+    "author": "Manalelaidouni",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR aims at integrating `Vocos` model to `transformers`. Vocos is a neural vocoder designed for high quality audio synthesis in TTS pipelines and related tasks, outpeforms `HifiGan` and it is significantly faste\u2026",
+    "changed_files": 32,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 94,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39403",
+    "created_at": "2025-07-14T18:25:37Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39403/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39403",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 39403,
+    "review_comments_count": 167,
+    "state": "open",
+    "title": "Add Vocos model",
+    "updated_at": "2026-01-26T23:34:09Z"
+  },
+  {
+    "additions": 3,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39390",
+    "created_at": "2025-07-14T02:43:59Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39390/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39390",
+    "labels": [],
+    "merged": false,
+    "number": 39390,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix CI bug for shieldgemma2 model",
+    "updated_at": "2026-04-13T02:40:49Z"
+  },
+  {
+    "additions": 51,
+    "author": "z-pandeyji",
+    "author_association": "NONE",
+    "body_excerpt": "@amyeroberts @Narsil ## Fixes #31356 **Issue**: Object Detection Pipeline only outputs first element when batching ## Solution The Object Detection Pipeline now correctly handles batched inputs, returning results for all images in the batc\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39306",
+    "created_at": "2025-07-09T13:16:29Z",
+    "deletions": 19,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39306/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39306",
+    "labels": [],
+    "merged": false,
+    "number": 39306,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix batch object detection 31356",
+    "updated_at": "2026-04-10T13:14:50Z"
+  },
+  {
+    "additions": 15,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "In original test case, there is no semantic association between input prompt and expected text. This PR changes the input and expected output according to different platforms.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39190",
+    "created_at": "2025-07-03T08:40:16Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39190/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39190",
+    "labels": [],
+    "merged": true,
+    "number": 39190,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "adjust input and output texts for test_modeling_recurrent_gemma.py",
+    "updated_at": "2026-04-13T02:41:15Z"
+  },
+  {
+    "additions": 4,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "In this PR: [36132](https://github.com/huggingface/transformers/pull/36132/files#diff-ed55888e6665791fe92cc8fc0c499da54f4ace6738551cd9a2591881cda076deR2366), when we use FSDP, it will not use accelerator to prepare model, which will lead t\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39177",
+    "created_at": "2025-07-02T15:15:08Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39177/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39177",
+    "labels": [],
+    "merged": true,
+    "number": 39177,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "fix bug using FSDP V1 will lead to model device not properly set",
+    "updated_at": "2026-04-13T02:41:19Z"
+  },
+  {
+    "additions": 14,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@SunMarc @MekkCyber, pls help review. When we use sample code on XPU like: ``` import torch from transformers import AutoModelForCausalLM, AutoTokenizer model_id = \"hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4\" tokenizer = AutoToken\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39144",
+    "created_at": "2025-07-01T09:18:24Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39144",
+    "labels": [],
+    "merged": false,
+    "number": 39144,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix bug when using gptq model on xpu device",
+    "updated_at": "2026-04-13T02:41:18Z"
+  },
+  {
+    "additions": 2505,
+    "author": "manueldeprada",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR completes **Part 1** of the cache refactor tracked in #38077. ### Summary: * Now `Cache` is structured in a list of layers. * Ports all existing cache types (Static, Dynamic, Offloaded, Quantized, Hybrid, etc.) to use layer composi\u2026",
+    "changed_files": 64,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39106",
+    "created_at": "2025-06-29T09:54:19Z",
+    "deletions": 2167,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39106/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39106",
+    "labels": [],
+    "merged": true,
+    "number": 39106,
+    "review_comments_count": 117,
+    "state": "closed",
+    "title": "[cache refactor] Move all the caching logic to a per-layer approach",
+    "updated_at": "2026-03-18T11:16:06Z"
+  },
+  {
+    "additions": 26,
+    "author": "ethanknights",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Improves the clarity of the `PyTorch` examples READMEs for image-classification and image-pretraining. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/39094",
+    "created_at": "2025-06-27T22:12:42Z",
+    "deletions": 29,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/39094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/39094",
+    "labels": [],
+    "merged": false,
+    "number": 39094,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: PyTorch examples (image-classification & image-pretraining) clarity",
+    "updated_at": "2026-03-07T12:12:25Z"
+  },
+  {
+    "additions": 18,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/38938",
+    "created_at": "2025-06-20T09:58:55Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/38938/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/38938",
+    "labels": [],
+    "merged": false,
+    "number": 38938,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "fix bug when using DP in trl, the batch size of input and output dism\u2026",
+    "updated_at": "2026-04-13T02:40:47Z"
+  },
+  {
+    "additions": 9,
+    "author": "ydshieh2",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/38784",
+    "created_at": "2025-06-12T08:51:40Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/38784/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/38784",
+    "labels": [],
+    "merged": false,
+    "number": 38784,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "break style",
+    "updated_at": "2026-01-28T15:11:48Z"
+  },
+  {
+    "additions": 292,
+    "author": "gspeter-max",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #38258 ## Description This PR resolves an issue where keyword arguments passed to `from_pretrained` or `from_config` for composite models were not being correctly routed to the respective sub-configs. This would lead to a `TypeError`\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/38672",
+    "created_at": "2025-06-08T06:51:48Z",
+    "deletions": 76,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/38672/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/38672",
+    "labels": [],
+    "merged": false,
+    "number": 38672,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "AutoConfig has potential issue with composite config. #38258 solved",
+    "updated_at": "2026-02-10T04:10:04Z"
+  },
+  {
+    "additions": 30,
+    "author": "gspeter-max",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #38268 ### Feature Description This PR implements the feature request to add sampling capabilities (e.g., Top-K, Top-P, temperature) to Group Beam Search, which was previously a purely greedy algorithm. ### Problem Currently, `_group\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/38653",
+    "created_at": "2025-06-07T04:17:07Z",
+    "deletions": 8,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/38653/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/38653",
+    "labels": [],
+    "merged": false,
+    "number": 38653,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add sampling support to group beam search ",
+    "updated_at": "2026-02-10T04:10:04Z"
+  },
+  {
+    "additions": 30,
+    "author": "gspeter-max",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #38468 ### Problem Description This PR addresses the `AssertionError: \"Torch not compiled with CUDA enabled\"` that occurs when attempting to load models using `device_map=\"auto\"` on systems with Ascend NPU hardware (Huawei NPU). The\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/38591",
+    "created_at": "2025-06-04T18:39:31Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/38591/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/38591",
+    "labels": [],
+    "merged": false,
+    "number": 38591,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix: Correctly handle integer device_map for NPU devices in _load_sta\u2026",
+    "updated_at": "2026-02-10T04:10:03Z"
+  },
+  {
+    "additions": 5030,
+    "author": "helboukkouri",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "> NOTE: I'll take care of all the details when I'm done with the code part. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release not\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/37950",
+    "created_at": "2025-05-04T16:44:11Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/37950/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/37950",
+    "labels": [],
+    "merged": false,
+    "number": 37950,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add character-bert from cli",
+    "updated_at": "2026-02-20T22:00:13Z"
+  },
+  {
+    "additions": 6,
+    "author": "lynashere",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes the do_sample default issue. It should normally be set to `False` as default but it was reported that if it's not explicitly defined, it defaults to `True`. Fixes #37891 ## Before submitting - [ ] This PR fixe\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/37901",
+    "created_at": "2025-04-30T19:49:17Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/37901/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/37901",
+    "labels": [],
+    "merged": false,
+    "number": 37901,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix-do_sample-default",
+    "updated_at": "2026-03-02T22:36:21Z"
+  },
+  {
+    "additions": 5307,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds support for [XCodec2](https://github.com/zhenye234/X-Codec-2.0) a high fidelity general neural audio codec used in [Llasa](https://huggingface.co/collections/HKUSTAudio/llasa-679b87dbd06ac556cc0e0f44) a\u2026",
+    "changed_files": 22,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/37868",
+    "created_at": "2025-04-29T14:58:31Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/37868/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/37868",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 37868,
+    "review_comments_count": 104,
+    "state": "closed",
+    "title": "Add xcodec2 model",
+    "updated_at": "2026-02-17T09:10:23Z"
+  },
+  {
+    "additions": 10,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/37142",
+    "created_at": "2025-03-31T13:48:19Z",
+    "deletions": 84,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/37142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/37142",
+    "labels": [],
+    "merged": true,
+    "number": 37142,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[qwen3] fix generation tests",
+    "updated_at": "2026-03-20T02:38:27Z"
+  },
+  {
+    "additions": 30,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/37026",
+    "created_at": "2025-03-27T02:21:16Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/37026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/37026",
+    "labels": [],
+    "merged": true,
+    "number": 37026,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix: AttributeError: 'LlavaProcessor' object has no attribute 'image_token_id'",
+    "updated_at": "2026-02-20T07:32:34Z"
+  },
+  {
+    "additions": 1158,
+    "author": "zyxciss",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# **[\ud83d\ude80 Add Support for Sapnous-VR-6B Model]** ## **What does this PR do?** This PR **adds support for the Sapnous-VR-6B model**, a transformer-based language model designed for multimodal reasoning and enhanced causal text generation. ## H\u2026",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/36952",
+    "created_at": "2025-03-25T08:06:15Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/36952/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/36952",
+    "labels": [],
+    "merged": false,
+    "number": 36952,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Added Sapnous Architecture",
+    "updated_at": "2026-04-08T03:25:25Z"
+  },
+  {
+    "additions": 5464,
+    "author": "sbucaille",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Implements RF-DETR Fixes #36879 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://gith\u2026",
+    "changed_files": 22,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 35,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/36895",
+    "created_at": "2025-03-21T22:28:16Z",
+    "deletions": 46,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/36895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/36895",
+    "labels": [
+      "New model",
+      "Vision"
+    ],
+    "merged": false,
+    "number": 36895,
+    "review_comments_count": 347,
+    "state": "open",
+    "title": "Add RF-DETR",
+    "updated_at": "2026-04-13T23:57:11Z"
+  },
+  {
+    "additions": 7658,
+    "author": "konstantinos-p",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR introduces the **DINO DETR (DEtection TRansformer with DIstillation)** model (https://arxiv.org/abs/2203.03605) to the Hugging Face Transformers library. DINO DETR is a state-of-the-art object detection mod\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 34,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/36711",
+    "created_at": "2025-03-14T06:35:06Z",
+    "deletions": 37,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/36711/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/36711",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 36711,
+    "review_comments_count": 73,
+    "state": "open",
+    "title": "[WIP] Add DINO DETR Model to HuggingFace Transformers",
+    "updated_at": "2026-04-13T21:41:16Z"
+  },
+  {
+    "additions": 148,
+    "author": "apehex",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This is a simple implementation of a byte tokenizer. It is useful for models like the recent [BLT from Meta](https://arxiv.org/abs/2412.09871). It accepts all the encoding schemes from the built-in `encode` function\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/36216",
+    "created_at": "2025-02-15T13:06:20Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/36216/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/36216",
+    "labels": [],
+    "merged": false,
+    "number": 36216,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] Add a dedicated tokenizer for byte level transformers",
+    "updated_at": "2026-02-22T08:23:11Z"
+  },
+  {
+    "additions": 2279,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds [StyleTTS 2](https://huggingface.co/papers/2306.07691) to support the original model but also other checkpoints like [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M). \ud83c\udd95 \ud83d\udd25 **This implementation also add batch\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35790",
+    "created_at": "2025-01-20T12:27:19Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35790/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35790",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 35790,
+    "review_comments_count": 33,
+    "state": "open",
+    "title": "Add StyleTTS 2",
+    "updated_at": "2026-03-25T03:38:01Z"
+  },
+  {
+    "additions": 5,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When training llava, it sometimes mixes vision instruction data with text instruction data. However, an error occurs when only text instruction data is input. ```python AttributeError 'NoneType' object has no attrib\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35526",
+    "created_at": "2025-01-06T07:46:20Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35526/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35526",
+    "labels": [],
+    "merged": false,
+    "number": 35526,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Legacy processing is triggered even when only pure text is input in llava without img_token",
+    "updated_at": "2026-02-20T07:32:41Z"
+  },
+  {
+    "additions": 57,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? #35254 I wrote the code with reference to the error message of [mllama](https://github.com/huggingface/transformers/blob/main/src/transformers/models/mllama/processing_mllama.py#L308-L311). ## Before submitting - [\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35521",
+    "created_at": "2025-01-06T01:47:06Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35521/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35521",
+    "labels": [],
+    "merged": false,
+    "number": 35521,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Validate the num imgs and vids tokens",
+    "updated_at": "2026-02-20T07:32:30Z"
+  },
+  {
+    "additions": 1,
+    "author": "canmike",
+    "author_association": "NONE",
+    "body_excerpt": "This PR improves the documentation for class_labels in the Mask2Former model. Clarified the description of class_labels to better explain the shape of the class_labels parameter.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35514",
+    "created_at": "2025-01-05T13:29:07Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35514/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35514",
+    "labels": [],
+    "merged": false,
+    "number": 35514,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "docs: Clarify descriptions for mask_labels in Mask2Former",
+    "updated_at": "2026-03-11T12:19:30Z"
+  },
+  {
+    "additions": 564,
+    "author": "justincharney",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? We implement the [Heavy-Hitter Oracle (H2O)](https://arxiv.org/abs/2306.14048) cache eviction strategy in Huggingface transformers, which selectively retains a balance of KV pairs that are recent or contribute most\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35381",
+    "created_at": "2024-12-21T04:38:58Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35381/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35381",
+    "labels": [],
+    "merged": false,
+    "number": 35381,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add support for H2O cache eviction with LLaMA",
+    "updated_at": "2026-04-10T09:29:49Z"
+  },
+  {
+    "additions": 7,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.Seq2SeqTrainingArguments <img width=\"944\" alt=\"Screenshot 2024-12-13 at 15 01 59\" src=\"https://github.com/user-attachments/assets/49f12cd\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35258",
+    "created_at": "2024-12-13T13:39:10Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35258/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35258",
+    "labels": [],
+    "merged": true,
+    "number": 35258,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `Seq2SeqTrainingArguments` documentation",
+    "updated_at": "2026-02-20T13:49:33Z"
+  },
+  {
+    "additions": 20,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR introduces a caching mechanism for the added_tokens_encoder property in tokenizers to improve performance. Previously, the added_tokens_encoder mapping was recomputed every time the property was accessed, le\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35218",
+    "created_at": "2024-12-11T18:43:56Z",
+    "deletions": 3,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/35218/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35218",
+    "labels": [
+      "Core: Tokenization"
+    ],
+    "merged": false,
+    "number": 35218,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "added cached tokenizer",
+    "updated_at": "2026-02-17T09:10:23Z"
+  },
+  {
+    "additions": 43,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? For Falcon 40B in the `transformers` modeling code, the Q, K, and V tensors are fused and stored in an interleaved manner. This means that, for each group, the Q tensors for all heads in the group are stacked togeth\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35088",
+    "created_at": "2024-12-04T15:49:05Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35088",
+    "labels": [],
+    "merged": false,
+    "number": 35088,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix : Falcon processor doesn't account for a layout difference of qkv between transformers and GGUF",
+    "updated_at": "2026-03-06T15:12:29Z"
+  },
+  {
+    "additions": 14,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In PR #33424, we resolved issue #34447 by adding `num_additional_image_tokens` to the processor. However, the additional tokens are only considered in the processor, and since they are not accounted for in the model\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/35052",
+    "created_at": "2024-12-03T01:12:21Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/35052/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/35052",
+    "labels": [],
+    "merged": true,
+    "number": 35052,
+    "review_comments_count": 11,
+    "state": "closed",
+    "title": "Add: num_additional_image_tokens to models",
+    "updated_at": "2026-02-20T07:32:31Z"
+  },
+  {
+    "additions": 406,
+    "author": "coekfung",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34954",
+    "created_at": "2024-11-27T08:29:53Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34954/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34954",
+    "labels": [
+      "SDPA",
+      "run-slow"
+    ],
+    "merged": false,
+    "number": 34954,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[`ESM`] Add support for sdpa.",
+    "updated_at": "2026-04-11T13:15:28Z"
+  },
+  {
+    "additions": 20,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Enable XPU path in AutoAWQ",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34712",
+    "created_at": "2024-11-13T05:31:21Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34712/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34712",
+    "labels": [],
+    "merged": true,
+    "number": 34712,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "add xpu path for awq",
+    "updated_at": "2026-03-06T08:15:50Z"
+  },
+  {
+    "additions": 10,
+    "author": "andrewqianpublic",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This update checks for low memory and clears out cache is memory is getting low on the gpu at the end of generation. This should reduce overhead caused by clearing the cache. Fixes # (issue) Clear unused allocated c\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34667",
+    "created_at": "2024-11-09T06:18:31Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34667/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34667",
+    "labels": [],
+    "merged": false,
+    "number": 34667,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Clear unused allocated GPU memory when available GPU memory is low. ",
+    "updated_at": "2026-03-06T07:28:06Z"
+  },
+  {
+    "additions": 403,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Successor of https://github.com/huggingface/transformers/pull/34275. Analogously supports vectorized output from audio LLMs. Currently we have only Qwen2Audio which needs to upload it template on the hub with minor\u2026",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34601",
+    "created_at": "2024-11-04T13:47:33Z",
+    "deletions": 160,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34601/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34601",
+    "labels": [],
+    "merged": true,
+    "number": 34601,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "Support `return_tensors` in audio chat templates",
+    "updated_at": "2026-03-06T07:20:54Z"
+  },
+  {
+    "additions": 12,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When running DeepSpeed ZeRO3 in Bfloat16 environment, weights are not saved during save_steps if `stage3_gather_16bit_weights_on_model_save` is False. However, since there's no error or warning message, it's difficu\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34524",
+    "created_at": "2024-10-31T01:33:09Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34524/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34524",
+    "labels": [],
+    "merged": false,
+    "number": 34524,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add error message for weights not being saved when running ZeRO3 with Bfloat16",
+    "updated_at": "2026-02-20T07:32:38Z"
+  },
+  {
+    "additions": 24,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? While resolving issue #34301, discovered that ViT lacks do_rgb_convert. Therefore, added do_rgb_convert to ViTImageProcessor. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34523",
+    "created_at": "2024-10-31T01:30:06Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34523/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34523",
+    "labels": [
+      "Vision",
+      "Processing"
+    ],
+    "merged": true,
+    "number": 34523,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Add do_convert_rgb to vit",
+    "updated_at": "2026-02-20T07:32:40Z"
+  },
+  {
+    "additions": 6,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? ```python Image features and image tokens do not match: tokens: 2403, features 2349 File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llava_next/modeling_llava_next.py\", line 921, in forward raise Va\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34522",
+    "created_at": "2024-10-31T01:27:53Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34522/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34522",
+    "labels": [],
+    "merged": true,
+    "number": 34522,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: img size mismatch caused by incorrect unpadding in LLaVA-Next",
+    "updated_at": "2026-02-20T07:32:39Z"
+  },
+  {
+    "additions": 34,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR should fix #33966.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34384",
+    "created_at": "2024-10-24T15:39:49Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34384/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34384",
+    "labels": [],
+    "merged": false,
+    "number": 34384,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix `past_key_values` as input when using `Cache`",
+    "updated_at": "2026-03-16T15:59:33Z"
+  },
+  {
+    "additions": 3,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? ```python # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] if do_convert_rgb: images = [convert_to_rgb(image) for image in images] ``` The convert_to_rgb function is no\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34301",
+    "created_at": "2024-10-22T01:33:27Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34301/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34301",
+    "labels": [],
+    "merged": true,
+    "number": 34301,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: siglip image processor rgb_convert is not being applied correctly.",
+    "updated_at": "2026-02-20T07:32:44Z"
+  },
+  {
+    "additions": 243,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Simplifies and extends the way we load tokenizer files",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34212",
+    "created_at": "2024-10-17T09:21:57Z",
+    "deletions": 139,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34212",
+    "labels": [],
+    "merged": false,
+    "number": 34212,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "Allow for more tokenizer formats",
+    "updated_at": "2026-01-27T10:52:24Z"
+  },
+  {
+    "additions": 325,
+    "author": "muellerzr",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In conjunction with https://github.com/huggingface/transformers/pull/34191, this PR solves the other half of what's needed: 1. Letting users pass in their own loss functions directly to the Trainer via `compute_loss\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/34198",
+    "created_at": "2024-10-16T14:52:28Z",
+    "deletions": 124,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/34198/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/34198",
+    "labels": [],
+    "merged": true,
+    "number": 34198,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": " Enable users to use their own loss functions + deal with prefetching for grad accum",
+    "updated_at": "2026-02-23T21:30:13Z"
+  },
+  {
+    "additions": 15,
+    "author": "maxwbuckley",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Improve Chameleon documentation. Especially focused on how the image processing works. Fixes #33647 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/33686",
+    "created_at": "2024-09-24T20:28:02Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/33686/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/33686",
+    "labels": [],
+    "merged": false,
+    "number": 33686,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Updating Chameleon Image handling documentation",
+    "updated_at": "2026-04-07T05:09:42Z"
+  },
+  {
+    "additions": 5480,
+    "author": "SangbumChoi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This is the code of compatible for rtdetr-v2. https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml At this moment I just uploaded rtdetrv2_r18vd for the test,\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/33244",
+    "created_at": "2024-09-02T05:57:35Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/33244/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/33244",
+    "labels": [
+      "New model",
+      "Vision",
+      "run-slow"
+    ],
+    "merged": false,
+    "number": 33244,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": "Add rtdetr-v2 version of code",
+    "updated_at": "2026-03-26T15:48:42Z"
+  },
+  {
+    "additions": 1,
+    "author": "dependabot[bot]",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Bumps [torch](https://github.com/pytorch/pytorch) from 1.13.1 to 2.2.0. <details> <summary>Release notes</summary> <p><em>Sourced from <a href=\"https://github.com/pytorch/pytorch/releases\">torch's releases</a>.</em></p> <blockquote> <h2>Py\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/33215",
+    "created_at": "2024-08-30T13:24:23Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/33215/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/33215",
+    "labels": [
+      "dependencies",
+      "python"
+    ],
+    "merged": true,
+    "number": 33215,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Bump torch from 1.13.1 to 2.2.0 in /examples/research_projects/decision_transformer",
+    "updated_at": "2026-03-31T18:03:05Z"
+  },
+  {
+    "additions": 136,
+    "author": "audioXD",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds DINOv2 with registers as proposed in [Vision Transformers Need Registers](https://arxiv.org/abs/2309.16588). My aim was to add registers with minimal code changes in a backward compatible way. My change\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/32127",
+    "created_at": "2024-07-22T01:23:15Z",
+    "deletions": 26,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/32127/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/32127",
+    "labels": [],
+    "merged": false,
+    "number": 32127,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "DINOv2 register support",
+    "updated_at": "2026-02-09T12:08:22Z"
+  },
+  {
+    "additions": 6488,
+    "author": "EduardoPach",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/issues/23240 by adding `ImageBind` model. This is based on https://github.com/huggingface/transformers/pull/26310 which is currently stale and the author sai\u2026",
+    "changed_files": 26,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/30690",
+    "created_at": "2024-05-07T09:35:06Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/30690/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/30690",
+    "labels": [
+      "New model",
+      "Multimodal",
+      "run-slow"
+    ],
+    "merged": false,
+    "number": 30690,
+    "review_comments_count": 233,
+    "state": "open",
+    "title": "Adding imagebind",
+    "updated_at": "2026-04-11T16:19:20Z"
+  },
+  {
+    "additions": 96221,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds RWKV5, superseeds #26963",
+    "changed_files": 40,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 28,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/29095",
+    "created_at": "2024-02-19T02:12:03Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/29095/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/29095",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 29095,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[`RWKV5`] Add support for RWKV5 model",
+    "updated_at": "2026-01-27T10:51:51Z"
+  },
+  {
+    "additions": 2797,
+    "author": "younesbelkada",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? as discussed offline cc @merveenoyan @NielsRogge This PR adds MobileSam to the library. MobileSam uses the same archtiecture as SAM, with the SAM image encoder being swapped to TinyViT. Therefore I decided to create\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/28261",
+    "created_at": "2023-12-27T12:35:14Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/28261/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/28261",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 28261,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`MobileSam`] Adds MobileSAM to transformers",
+    "updated_at": "2026-04-15T06:20:43Z"
+  },
+  {
+    "additions": 2444,
+    "author": "xunkai55",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Add ChatGLM model support in HF Transformers repo. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contributor guid\u2026",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/27677",
+    "created_at": "2023-11-23T15:21:26Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/27677/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/27677",
+    "labels": [],
+    "merged": false,
+    "number": 27677,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add ChatGLM model.",
+    "updated_at": "2026-04-14T21:42:20Z"
+  },
+  {
+    "additions": 1075,
+    "author": "Saibo-creator",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds a new feature (Context Free Grammar Constrained Decoding) to the library. There is already one PR(WIP) for this feature( #26520 ), but this one has a different motivation and implementation. This implem\u2026",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/27557",
+    "created_at": "2023-11-17T10:28:37Z",
+    "deletions": 493,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/27557/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/27557",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 27557,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Context Free Grammar Constrained Decoding (ebnf interface, compatible with llama-cpp)",
+    "updated_at": "2026-04-15T10:47:30Z"
+  },
+  {
+    "additions": 3071,
+    "author": "helboukkouri",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 69,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/26617",
+    "created_at": "2023-10-05T12:44:55Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/26617/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/26617",
+    "labels": [],
+    "merged": false,
+    "number": 26617,
+    "review_comments_count": 68,
+    "state": "open",
+    "title": "[WIP] Add CharacterBERT model",
+    "updated_at": "2026-04-15T18:48:37Z"
+  },
+  {
+    "additions": 4170,
+    "author": "rafaelpadilla",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds BLIVA to transformers. * Original repo: https://github.com/mlpc-ucsd/BLIVA * Paper: https://arxiv.org/abs/2308.09936 Fixes #26629 - issue with new model request",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/26558",
+    "created_at": "2023-10-03T10:45:50Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/26558/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/26558",
+    "labels": [
+      "New model"
+    ],
+    "merged": false,
+    "number": 26558,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[WIP] Adding BLIVA model",
+    "updated_at": "2026-04-15T10:46:37Z"
+  },
+  {
+    "additions": 3337,
+    "author": "lxchtan",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 31,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/22604",
+    "created_at": "2023-04-06T08:22:22Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/22604/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/22604",
+    "labels": [],
+    "merged": false,
+    "number": 22604,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "[WIP] Add PoNet",
+    "updated_at": "2026-02-15T15:54:57Z"
+  },
+  {
+    "additions": 60,
+    "author": "gante",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Related to #21761 Problem: In some functions, we detect the framework of the model class through its name (e.g. if it starts with `TF`). This is a quirk of our library, and users might run into issues due to this hi\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/21784",
+    "created_at": "2023-02-24T12:09:33Z",
+    "deletions": 35,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/21784/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/21784",
+    "labels": [],
+    "merged": true,
+    "number": 21784,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Inheritance-based framework detection",
+    "updated_at": "2026-04-14T21:25:59Z"
+  },
+  {
+    "additions": 39,
+    "author": "stas00",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR was prompted by [this discussion](https://github.com/pytorch/torchdistx/pull/52#discussion_r1082027732) with @lessw2020. The PR works, just keeping it as Draft for now as I haven't polished it to be ready for merging. # How to perf\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/21312",
+    "created_at": "2023-01-26T05:31:25Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/21312/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/21312",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 21312,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[pure bf16 training] w/ `AnyPrecisionAdamW` and Kahan summation",
+    "updated_at": "2026-04-14T21:25:43Z"
+  },
+  {
+    "additions": 158,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR is a follow-up of #19341, to make sure weights are properly initialized when training vision models from scratch.",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/19449",
+    "created_at": "2022-10-10T09:47:04Z",
+    "deletions": 79,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/19449/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/19449",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 19449,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "[WIP] Fix weights initialization of several vision models",
+    "updated_at": "2026-04-15T06:20:12Z"
+  },
+  {
+    "additions": 793,
+    "author": "flozi00",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "another continue of https://github.com/huggingface/transformers/issues/18564 @sgugger",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/19171",
+    "created_at": "2022-09-23T15:05:44Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/19171/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/19171",
+    "labels": [],
+    "merged": true,
+    "number": 19171,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "german training, accelerate and model sharing",
+    "updated_at": "2026-04-14T21:33:08Z"
+  },
+  {
+    "additions": 1795,
+    "author": "patil-suraj",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds the VQGAN model, first step for adding the Dallemega model in transformers. - This model is different from most the models available in `Transformers`, it's an U-Net like encoder-decoder architecture with vecto\u2026",
+    "changed_files": 20,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/18150",
+    "created_at": "2022-07-15T14:42:47Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/18150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/18150",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 18150,
+    "review_comments_count": 15,
+    "state": "closed",
+    "title": "Add VQGAN",
+    "updated_at": "2026-04-15T10:46:57Z"
+  },
+  {
+    "additions": 10,
+    "author": "stas00",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "An attempt to fix the issue reported https://github.com/huggingface/transformers/issues/17336 Fixes: https://github.com/huggingface/transformers/issues/17336",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/17373",
+    "created_at": "2022-05-20T21:00:58Z",
+    "deletions": 7,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/17373/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/17373",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 17373,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[WIP] [deepspeed] from_pretrained deal with ignore_mismatched_sizes",
+    "updated_at": "2026-04-14T21:33:35Z"
+  },
+  {
+    "additions": 41,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR improves SegFormer by removing the `reshape_last_stage` attribute of the configuration. In fact, this attribute was not needed at all. Previously, the `reshape_last_stage` argument was set to `False` for `Se\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/15748",
+    "created_at": "2022-02-21T10:02:38Z",
+    "deletions": 26,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/15748/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/15748",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 15748,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix segformer reshape last stage",
+    "updated_at": "2026-04-15T06:19:56Z"
+  },
+  {
+    "additions": 33,
+    "author": "stas00",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "documenting faster / leaner optimizers TODO: - add `--optim adamw_bnb_8bit` for HF Trainer.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/14708",
+    "created_at": "2021-12-09T18:49:27Z",
+    "deletions": 10,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/14708/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/14708",
+    "labels": [
+      "WIP"
+    ],
+    "merged": false,
+    "number": 14708,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] [performance doc] faster/leaner optimizers",
+    "updated_at": "2026-03-24T04:00:09Z"
+  },
+  {
+    "additions": 77,
+    "author": "stas00",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR is working on an integration of [Deepspeed Inference](https://www.deepspeed.ai/tutorials/inference-tutorial/) which implements Tensor Parallelism. This is different from Deepspeed ZeRO inference. This is a very early draft. To try:\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/14426",
+    "created_at": "2021-11-17T01:57:38Z",
+    "deletions": 3,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/14426/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/14426",
+    "labels": [
+      "WIP",
+      "Inference"
+    ],
+    "merged": false,
+    "number": 14426,
+    "review_comments_count": 8,
+    "state": "closed",
+    "title": "[Deepspeed Inference] HF Integration",
+    "updated_at": "2026-03-23T12:33:37Z"
+  },
+  {
+    "additions": 18,
+    "author": "zrxbeijing",
+    "author_association": "NONE",
+    "body_excerpt": "The preprocessing of glue datasets is too slow. This change enables multiprocessing to speed up the process of converting examples to features by utilizing multiple cpu cores.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/4821",
+    "created_at": "2020-06-06T21:16:43Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/4821/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/4821",
+    "labels": [],
+    "merged": false,
+    "number": 4821,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Enable multiprocessing in glue datasets",
+    "updated_at": "2026-03-26T12:07:58Z"
+  },
+  {
+    "additions": 84,
+    "author": "DomHudson",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary I replace the code that makes the position ids with logic closer to the original fairseq `make_positions` function. It wasn't clear to me what to do in the event that the embeddings are passed in directly through `inputs_embeds`\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/1764",
+    "created_at": "2019-11-07T18:05:01Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/1764/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/1764",
+    "labels": [],
+    "merged": true,
+    "number": 1764,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Bug-fix: Roberta Embeddings Not Masked",
+    "updated_at": "2026-02-28T00:55:19Z"
+  },
+  {
+    "additions": 2,
+    "author": "davidefiocco",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "I tried to address https://github.com/huggingface/pytorch-pretrained-BERT/issues/76 should be correct, but there's likely a more efficient way.",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/77",
+    "created_at": "2018-12-02T11:38:51Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/77/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/77",
+    "labels": [],
+    "merged": true,
+    "number": 77,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Correct assignment for logits in classifier example",
+    "updated_at": "2026-03-03T00:22:35Z"
   }
 ]