diff --git "a/data/prs.json" "b/data/prs.json" --- "a/data/prs.json" +++ "b/data/prs.json" @@ -25364,5 +25364,20522 @@ "state": "open", "title": "Refactored vits to match standardized output collection interface", "updated_at": "2026-02-19T12:18:56Z" + }, + { + "additions": 79, + "author": "IlyasMoutawwakil", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/44127", + "created_at": "2026-02-18T10:41:48Z", + "deletions": 8, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44127/files", + "html_url": "https://github.com/huggingface/transformers/pull/44127", + "labels": [], + "merged": true, + "number": 44127, + "review_comments_count": 0, + "state": "closed", + "title": "AutoTokenizer ignores config when model_type is None", + "updated_at": "2026-02-18T14:47:52Z" + }, + { + "additions": 17, + "author": "Cyrilvallez", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44126", + "created_at": "2026-02-18T09:58:49Z", + "deletions": 40, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44126/files", + "html_url": "https://github.com/huggingface/transformers/pull/44126", + "labels": [], + "merged": true, + "number": 44126, + "review_comments_count": 0, + "state": "closed", + "title": "Simplify input preparation in generate", + "updated_at": "2026-02-18T10:30:48Z" + }, + { + "additions": 8, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44125", + "created_at": "2026-02-18T09:34:54Z", + "deletions": 7, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44125/files", + "html_url": "https://github.com/huggingface/transformers/pull/44125", + "labels": [], + "merged": true, + "number": 44125, + "review_comments_count": 2, + "state": "closed", + "title": "Raise informative error when loading video processors", + "updated_at": "2026-02-20T08:23:35Z" + }, + { + "additions": 10, + "author": "mariam851", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44124", + "created_at": "2026-02-18T08:52:23Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44124/files", + "html_url": "https://github.com/huggingface/transformers/pull/44124", + "labels": [], + "merged": false, + "number": 44124, + "review_comments_count": 0, + "state": "closed", + "title": "feat: add eval_on_end to Trainer for final evaluation", + "updated_at": "2026-02-18T14:14:16Z" + }, + { + "additions": 33, + "author": "cyyever", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44123", + "created_at": "2026-02-18T08:22:57Z", + "deletions": 22, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44123/files", + "html_url": "https://github.com/huggingface/transformers/pull/44123", + "labels": [], + "merged": false, + "number": 44123, + "review_comments_count": 0, + "state": "open", + "title": "Avoid device sync in training loss accumulation", + "updated_at": "2026-03-30T07:57:16Z" + }, + { + "additions": 158, + "author": "adityuhkapoor", + "author_association": "NONE", + "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026", + "changed_files": 4, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44122", + "created_at": "2026-02-18T06:35:09Z", + "deletions": 2, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44122/files", + "html_url": "https://github.com/huggingface/transformers/pull/44122", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44122, + "review_comments_count": 0, + "state": "closed", + "title": "Add BnB 4-bit embedding quantization support", + "updated_at": "2026-02-18T14:27:25Z" + }, + { + "additions": 14, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44120", + "created_at": "2026-02-17T23:56:48Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44120/files", + "html_url": "https://github.com/huggingface/transformers/pull/44120", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44120, + "review_comments_count": 0, + "state": "closed", + "title": "fix: allow image_transforms.resize to handle negative values after normalization", + "updated_at": "2026-02-18T14:08:54Z" + }, + { + "additions": 1, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/44119", + "created_at": "2026-02-17T23:53:20Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44119/files", + "html_url": "https://github.com/huggingface/transformers/pull/44119", + "labels": [], + "merged": false, + "number": 44119, + "review_comments_count": 0, + "state": "closed", + "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer", + "updated_at": "2026-02-18T14:04:47Z" + }, + { + "additions": 32, + "author": "tirth8205", + "author_association": "NONE", + "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 0, + "conversation_url": "https://github.com/huggingface/transformers/pull/44118", + "created_at": "2026-02-17T23:31:31Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44118/files", + "html_url": "https://github.com/huggingface/transformers/pull/44118", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 44118, + "review_comments_count": 0, + "state": "closed", + "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields", + "updated_at": "2026-02-18T14:18:12Z" + }, + { + "additions": 27, + "author": "dtiourine", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44116", + "created_at": "2026-02-17T21:52:13Z", + "deletions": 102, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44116/files", + "html_url": "https://github.com/huggingface/transformers/pull/44116", + "labels": [], + "merged": false, + "number": 44116, + "review_comments_count": 0, + "state": "open", + "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface", + "updated_at": "2026-02-17T21:53:23Z" + }, + { + "additions": 2, + "author": "Deep-unlearning", + "author_association": "MEMBER", + "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44115", + "created_at": "2026-02-17T21:32:55Z", + "deletions": 2, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44115/files", + "html_url": "https://github.com/huggingface/transformers/pull/44115", + "labels": [], + "merged": true, + "number": 44115, + "review_comments_count": 0, + "state": "closed", + "title": "[docs] fix broken chat_templating links in tasks docs", + "updated_at": "2026-02-23T16:27:57Z" + }, + { + "additions": 716, + "author": "23atharvaS", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026", + "changed_files": 19, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44114", + "created_at": "2026-02-17T21:17:35Z", + "deletions": 1237, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44114/files", + "html_url": "https://github.com/huggingface/transformers/pull/44114", + "labels": [], + "merged": false, + "number": 44114, + "review_comments_count": 0, + "state": "open", + "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators", + "updated_at": "2026-02-18T20:34:53Z" + }, + { + "additions": 5, + "author": "harshaljanjani", + "author_association": "CONTRIBUTOR", + "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44113", + "created_at": "2026-02-17T20:01:32Z", + "deletions": 7, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44113/files", + "html_url": "https://github.com/huggingface/transformers/pull/44113", + "labels": [], + "merged": true, + "number": 44113, + "review_comments_count": 2, + "state": "closed", + "title": "fix(testing): Update stale device override test in GraniteSpeech", + "updated_at": "2026-04-18T08:32:21Z" + }, + { + "additions": 30, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/44111", + "created_at": "2026-02-17T19:38:02Z", + "deletions": 59, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44111/files", + "html_url": "https://github.com/huggingface/transformers/pull/44111", + "labels": [], + "merged": false, + "number": 44111, + "review_comments_count": 0, + "state": "closed", + "title": "refactor(poolformer): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:22Z" + }, + { + "additions": 28, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44110", + "created_at": "2026-02-17T19:32:55Z", + "deletions": 101, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44110/files", + "html_url": "https://github.com/huggingface/transformers/pull/44110", + "labels": [], + "merged": false, + "number": 44110, + "review_comments_count": 0, + "state": "closed", + "title": "refactor(tvp): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:24Z" + }, + { + "additions": 48, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44109", + "created_at": "2026-02-17T19:23:03Z", + "deletions": 87, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44109/files", + "html_url": "https://github.com/huggingface/transformers/pull/44109", + "labels": [], + "merged": false, + "number": 44109, + "review_comments_count": 0, + "state": "closed", + "title": "refactor(hgnet_v2): use capture_outputs for output tracing", + "updated_at": "2026-02-18T21:19:25Z" + }, + { + "additions": 33, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44108", + "created_at": "2026-02-17T19:15:00Z", + "deletions": 82, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44108/files", + "html_url": "https://github.com/huggingface/transformers/pull/44108", + "labels": [], + "merged": false, + "number": 44108, + "review_comments_count": 0, + "state": "closed", + "title": "refactor(vitdet): use output tracing decorators", + "updated_at": "2026-02-18T21:19:27Z" + }, + { + "additions": 40, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44107", + "created_at": "2026-02-17T19:04:42Z", + "deletions": 112, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44107/files", + "html_url": "https://github.com/huggingface/transformers/pull/44107", + "labels": [], + "merged": false, + "number": 44107, + "review_comments_count": 0, + "state": "closed", + "title": "refactor(mra): use output tracing decorators", + "updated_at": "2026-02-18T21:19:29Z" + }, + { + "additions": 47, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44106", + "created_at": "2026-02-17T18:59:25Z", + "deletions": 132, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44106/files", + "html_url": "https://github.com/huggingface/transformers/pull/44106", + "labels": [], + "merged": false, + "number": 44106, + "review_comments_count": 0, + "state": "closed", + "title": "Refactor yoso to use automatic output tracing", + "updated_at": "2026-02-18T21:19:30Z" + }, + { + "additions": 39, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44105", + "created_at": "2026-02-17T18:54:40Z", + "deletions": 127, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44105/files", + "html_url": "https://github.com/huggingface/transformers/pull/44105", + "labels": [], + "merged": false, + "number": 44105, + "review_comments_count": 0, + "state": "closed", + "title": "Refactor lilt to use automatic output tracing", + "updated_at": "2026-02-18T21:19:32Z" + }, + { + "additions": 66, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44104", + "created_at": "2026-02-17T18:43:44Z", + "deletions": 207, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44104/files", + "html_url": "https://github.com/huggingface/transformers/pull/44104", + "labels": [], + "merged": false, + "number": 44104, + "review_comments_count": 0, + "state": "closed", + "title": "Refactor megatron_bert to use automatic output tracing", + "updated_at": "2026-02-18T21:19:34Z" + }, + { + "additions": 53, + "author": "engmohamedsalah", + "author_association": "NONE", + "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44103", + "created_at": "2026-02-17T18:04:48Z", + "deletions": 76, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44103/files", + "html_url": "https://github.com/huggingface/transformers/pull/44103", + "labels": [], + "merged": false, + "number": 44103, + "review_comments_count": 0, + "state": "closed", + "title": "Fix glm_moe_dsa", + "updated_at": "2026-02-18T19:38:11Z" + }, + { + "additions": 42, + "author": "fumadari", + "author_association": "NONE", + "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44102", + "created_at": "2026-02-17T17:21:32Z", + "deletions": 154, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44102/files", + "html_url": "https://github.com/huggingface/transformers/pull/44102", + "labels": [], + "merged": false, + "number": 44102, + "review_comments_count": 0, + "state": "closed", + "title": "Refactor ibert output tracing with capture_outputs", + "updated_at": "2026-02-18T21:19:35Z" + }, + { + "additions": 210, + "author": "aman-coder03", + "author_association": "FIRST_TIME_CONTRIBUTOR", + "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/44101", + "created_at": "2026-02-17T17:15:06Z", + "deletions": 194, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44101/files", + "html_url": "https://github.com/huggingface/transformers/pull/44101", + "labels": [], + "merged": false, + "number": 44101, + "review_comments_count": 0, + "state": "open", + "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture", + "updated_at": "2026-02-19T08:08:33Z" + }, + { + "additions": 3, + "author": "qgallouedec", + "author_association": "MEMBER", + "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/44100", + "created_at": "2026-02-17T17:10:36Z", + "deletions": 3, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/44100/files", + "html_url": "https://github.com/huggingface/transformers/pull/44100", + "labels": [], + "merged": true, + "number": 44100, + "review_comments_count": 0, + "state": "closed", + "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps", + "updated_at": "2026-02-20T09:57:51Z" + }, + { + "additions": 2, + "author": "qgallouedec", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? ## Related Issue Fixes #40170 **Issue:** Add MXFP4 MoE/attention backward kernels **URL:** https://github.com/huggingface/transformers/issues/40170 ## Problem ## A Call To Action! The Hugg\u2026", + "changed_files": 6, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/43771", + "created_at": "2026-02-05T15:12:21Z", + "deletions": 4, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43771/files", + "html_url": "https://github.com/huggingface/transformers/pull/43771", + "labels": [ + "Code agent slop" + ], + "merged": false, + "number": 43771, + "review_comments_count": 0, + "state": "closed", + "title": "fix: Add MXFP4 MoE/attention backward kernels", + "updated_at": "2026-03-24T14:14:44Z" + }, + { + "additions": 47, + "author": "lordaarush", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Removes the unconditional `self.state.train_batch_size = self._train_batch_size` assignment that was causing issues when resuming from checkpoint with different batch configurations. The `train_batch_size` should on\u2026", + "changed_files": 2, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 7, + "conversation_url": "https://github.com/huggingface/transformers/pull/43770", + "created_at": "2026-02-05T14:25:36Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43770/files", + "html_url": "https://github.com/huggingface/transformers/pull/43770", + "labels": [], + "merged": true, + "number": 43770, + "review_comments_count": 0, + "state": "closed", + "title": "Remove unconditional train_batch_size assignment", + "updated_at": "2026-02-06T14:47:16Z" + }, + { + "additions": 3950, + "author": "eustlb", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Adds voxtral realtime! ## benchmarks Using [this reproducer](https://gist.github.com/eustlb/367f062f77a5971291fb5350763bea8d), I've ran WER evals on ami, librispeech and fleurs, with results Dataset | Original (vllm\u2026", + "changed_files": 21, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/43769", + "created_at": "2026-02-05T14:17:52Z", + "deletions": 2, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43769/files", + "html_url": "https://github.com/huggingface/transformers/pull/43769", + "labels": [ + "New model", + "Audio" + ], + "merged": true, + "number": 43769, + "review_comments_count": 39, + "state": "closed", + "title": "Add Voxtral Realtime", + "updated_at": "2026-02-26T10:18:32Z" + }, + { + "additions": 87, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Helps vLLM to bump to v5", + "changed_files": 6, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 5, + "conversation_url": "https://github.com/huggingface/transformers/pull/43768", + "created_at": "2026-02-05T14:04:02Z", + "deletions": 5, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43768/files", + "html_url": "https://github.com/huggingface/transformers/pull/43768", + "labels": [], + "merged": true, + "number": 43768, + "review_comments_count": 10, + "state": "closed", + "title": "Fix init weights in remote code", + "updated_at": "2026-02-17T14:45:18Z" + }, + { + "additions": 850, + "author": "XingweiDeng", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? src/transformers/utils/import_utils.py:2317:16\u2026", + "changed_files": 0, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/43709", + "created_at": "2026-02-03T14:26:58Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43709/files", + "html_url": "https://github.com/huggingface/transformers/pull/43709", + "labels": [], + "merged": true, + "number": 43709, + "review_comments_count": 0, + "state": "closed", + "title": "fix: `VersionComparison.from_string` return type mismatch", + "updated_at": "2026-02-23T19:05:33Z" + }, + { + "additions": 2202, + "author": "liu-jiaxuan", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026", + "changed_files": 16, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 11, + "conversation_url": "https://github.com/huggingface/transformers/pull/43707", + "created_at": "2026-02-03T13:33:41Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43707/files", + "html_url": "https://github.com/huggingface/transformers/pull/43707", + "labels": [ + "New model" + ], + "merged": true, + "number": 43707, + "review_comments_count": 145, + "state": "closed", + "title": "[Model] Add SLANeXt Model Support", + "updated_at": "2026-03-20T17:24:22Z" + }, + { + "additions": 42, + "author": "vasqu", + "author_association": "MEMBER", + "body_excerpt": "As per title, the new way to call the attention interface has slipped through a refactor because it's too new and not too well known atm cc @yonigozlan", + "changed_files": 9, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/43706", + "created_at": "2026-02-03T11:57:22Z", + "deletions": 48, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43706/files", + "html_url": "https://github.com/huggingface/transformers/pull/43706", + "labels": [], + "merged": true, + "number": 43706, + "review_comments_count": 2, + "state": "closed", + "title": "[`Attn`] Fixup interface usage after refactor", + "updated_at": "2026-02-03T14:56:35Z" + }, + { + "additions": 120, + "author": "Cyrilvallez", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Allow the `is_causal` kwarg and config attribute to make well-behaved decoder-only models act as encoders", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 1, + "conversation_url": "https://github.com/huggingface/transformers/pull/43705", + "created_at": "2026-02-03T11:45:43Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43705/files", + "html_url": "https://github.com/huggingface/transformers/pull/43705", + "labels": [], + "merged": true, + "number": 43705, + "review_comments_count": 11, + "state": "closed", + "title": "Allow bi-directional attention for all models", + "updated_at": "2026-02-04T17:24:32Z" + }, + { + "additions": 1, + "author": "francesco-bertolotti", + "author_association": "CONTRIBUTOR", + "body_excerpt": "wrong `rms_norm_type` # What does this PR do? Small type error in the configuration of qwen3. `rms_norm_eps` should be a float and not an int. ## Before submitting - [ X] This PR fixes a typo or improves the docs (you can dismiss the other\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/43703", + "created_at": "2026-02-03T10:05:17Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43703/files", + "html_url": "https://github.com/huggingface/transformers/pull/43703", + "labels": [], + "merged": true, + "number": 43703, + "review_comments_count": 0, + "state": "closed", + "title": "Update configuration_qwen3.py", + "updated_at": "2026-02-04T07:03:04Z" + }, + { + "additions": 2828, + "author": "eustlb", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? Adds[ UsefulSensors'](https://huggingface.co/UsefulSensors) new ASR model.", + "changed_files": 19, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/43702", + "created_at": "2026-02-03T09:32:42Z", + "deletions": 247, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43702/files", + "html_url": "https://github.com/huggingface/transformers/pull/43702", + "labels": [ + "New model" + ], + "merged": true, + "number": 43702, + "review_comments_count": 30, + "state": "closed", + "title": "Add moonshine streaming", + "updated_at": "2026-02-12T10:10:16Z" + }, + { + "additions": 1, + "author": "YangKai0616", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Here pytorch has a mature mechanism to auto select the right backend for different devices. @ydshieh pls help review, thx!", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 6, + "conversation_url": "https://github.com/huggingface/transformers/pull/43699", + "created_at": "2026-02-03T07:33:04Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43699/files", + "html_url": "https://github.com/huggingface/transformers/pull/43699", + "labels": [], + "merged": false, + "number": 43699, + "review_comments_count": 3, + "state": "closed", + "title": "avoid using specified backend for tp tests", + "updated_at": "2026-03-09T08:17:48Z" + }, + { + "additions": 1, + "author": "sywangyi", + "author_association": "CONTRIBUTOR", + "body_excerpt": "- model loading (from pretrained, etc): @CyrilVallez - distributed: @3outeille @ArthurZucker fix tp crash. crash stack is [rank0]: Traceback (most recent call last): [rank0]: File \"/transformers/benchmark_v2/test_tp.py\", line 29, in - Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 ```bash input = { \"messages\": [ { \"role\": \"user\", \"content\": [ { \"type\": \"text\", \"text\": \"The history of France is \", } ], }, ], } I have a question about th\u2026", + "changed_files": 1, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/43670", + "created_at": "2026-02-02T02:06:14Z", + "deletions": 1, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43670/files", + "html_url": "https://github.com/huggingface/transformers/pull/43670", + "labels": [], + "merged": true, + "number": 43670, + "review_comments_count": 0, + "state": "closed", + "title": "Fix FP8Expert for Qwen", + "updated_at": "2026-02-02T15:18:49Z" + }, + { + "additions": 2, + "author": "fschlatt", + "author_association": "CONTRIBUTOR", + "body_excerpt": "# What does this PR do? makes the whole mixin behave like a static holder for methods... - Modify methods/inherited cl\u2026", + "changed_files": 137, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/43620", + "created_at": "2026-01-30T11:24:09Z", + "deletions": 288, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43620/files", + "html_url": "https://github.com/huggingface/transformers/pull/43620", + "labels": [], + "merged": true, + "number": 43620, + "review_comments_count": 0, + "state": "closed", + "title": "[`Rope`] Revert #43410 and make inheritance implicit again", + "updated_at": "2026-01-30T18:44:16Z" + }, + { + "additions": 40, + "author": "zucchini-nlp", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do? As per title, some models add or delete entries in tied weights depending on configuration. If we load two models consecutively with different configs, it fails to tie weights correctly I am copying it in `__init__`\u2026", + "changed_files": 4, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/43619", + "created_at": "2026-01-30T10:43:38Z", + "deletions": 6, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43619/files", + "html_url": "https://github.com/huggingface/transformers/pull/43619", + "labels": [ + "for patch" + ], + "merged": true, + "number": 43619, + "review_comments_count": 8, + "state": "closed", + "title": "Don't modify `tied_weight_keys` in-place", + "updated_at": "2026-01-30T15:46:02Z" + }, + { + "additions": 17, + "author": "kaixuanliu", + "author_association": "CONTRIBUTOR", + "body_excerpt": "@zucchini-nlp pls help review, thx! We have to add back the changes in https://github.com/huggingface/transformers/pull/42523. As for llava_onevision model, in its checkpoint config file, the model's `tie_word_embeddings` is Flase, and mod\u2026", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 3, + "conversation_url": "https://github.com/huggingface/transformers/pull/43617", + "created_at": "2026-01-30T10:21:45Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43617/files", + "html_url": "https://github.com/huggingface/transformers/pull/43617", + "labels": [], + "merged": false, + "number": 43617, + "review_comments_count": 0, + "state": "closed", + "title": "Fix tie_word_embedding issue for llava_onevision model", + "updated_at": "2026-04-13T02:41:01Z" + }, + { + "additions": 3, + "author": "yiliu30", + "author_association": "CONTRIBUTOR", + "body_excerpt": "Signed-off-by: yiliu30 # What does this PR do? ## Related Issue Fixes #43408 **Issue:** Warning: You are using a model of type sam3_video to instantiate a model of type sam3_tracker **URL:** https://github.com/huggingface/transformers/\u2026", + "changed_files": 8, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 12, + "conversation_url": "https://github.com/huggingface/transformers/pull/43495", + "created_at": "2026-01-26T12:46:21Z", + "deletions": 7, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43495/files", + "html_url": "https://github.com/huggingface/transformers/pull/43495", + "labels": [], + "merged": true, + "number": 43495, + "review_comments_count": 4, + "state": "closed", + "title": "fix: add compatible_model_types to suppress model type mismatch warnings", + "updated_at": "2026-02-05T13:31:24Z" + }, + { + "additions": 20, + "author": "githubnemo", + "author_association": "MEMBER", + "body_excerpt": "The Qwen3 MoE config was missing the mapping attribute for the num_expert_local config variable which made it impossible to load FP8 quantized models, due to the following exception: ``` Traceback (most recent call last): File \".../exps/tr\u2026", + "changed_files": 3, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 4, + "conversation_url": "https://github.com/huggingface/transformers/pull/43494", + "created_at": "2026-01-26T11:34:05Z", + "deletions": 0, + "draft": false, + "files_url": "https://github.com/huggingface/transformers/pull/43494/files", + "html_url": "https://github.com/huggingface/transformers/pull/43494", + "labels": [], + "merged": true, + "number": 43494, + "review_comments_count": 1, + "state": "closed", + "title": "Fix loading of Qwen3 FP8", + "updated_at": "2026-01-27T09:56:23Z" + }, + { + "additions": 54, + "author": "eustlb", + "author_association": "MEMBER", + "body_excerpt": "# What does this PR do?", + "changed_files": 5, + "cluster_id": null, + "cluster_ids": [], + "cluster_role": null, + "comments_count": 2, + "conversation_url": "https://github.com/huggingface/transformers/pull/43492", + "created_at": "2026-01-26T10:30:53Z", + "deletions": 1, + "draft": true, + "files_url": "https://github.com/huggingface/transformers/pull/43492/files", + "html_url": "https://github.com/huggingface/transformers/pull/43492", + "labels": [], + "merged": false, + "number": 43492, + "review_comments_count": 0, + "state": "open", + "title": "Perception Encoder follow up PR", + "updated_at": "2026-01-26T12:55:35Z" + }, + { + "additions": 605, + "author": "tarekziade", + "author_association": "MEMBER", + "body_excerpt": "DRAFT FOR DISCUSSION # What does this PR do?