Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Model debugging toolboxes","local":"model-debugging-toolboxes","sections":[{"title":"Model addition debuggers","local":"model-addition-debuggers","sections":[{"title":"Model addition debugger - context manager for model adders","local":"model-addition-debugger---context-manager-for-model-adders","sections":[],"depth":3},{"title":"Rationale","local":"rationale","sections":[],"depth":3},{"title":"Usage","local":"usage","sections":[],"depth":3},{"title":"Reading results","local":"reading-results","sections":[{"title":"Saving tensors to disk","local":"saving-tensors-to-disk","sections":[],"depth":4}],"depth":3},{"title":"Comparing between implementations","local":"comparing-between-implementations","sections":[],"depth":3},{"title":"Limitations and scope","local":"transformers.model_addition_debugger_context","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/pr_33892/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/scheduler.31fdf58d.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/singletons.9860629f.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.252883d5.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/paths.e85c0ec8.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/preload-helper.40847a0e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.2f76fdf0.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/0.ca4aafa4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/40.0838d680.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CopyLLMTxtMenu.ff482081.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.71f274cc.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/IconCopy.ac192424.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/Docstring.2ffe4171.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CodeBlock.ab12f8e1.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/ExampleCodeBlock.1360320a.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Model debugging toolboxes","local":"model-debugging-toolboxes","sections":[{"title":"Model addition debuggers","local":"model-addition-debuggers","sections":[{"title":"Model addition debugger - context manager for model adders","local":"model-addition-debugger---context-manager-for-model-adders","sections":[],"depth":3},{"title":"Rationale","local":"rationale","sections":[],"depth":3},{"title":"Usage","local":"usage","sections":[],"depth":3},{"title":"Reading results","local":"reading-results","sections":[{"title":"Saving tensors to disk","local":"saving-tensors-to-disk","sections":[],"depth":4}],"depth":3},{"title":"Comparing between implementations","local":"comparing-between-implementations","sections":[],"depth":3},{"title":"Limitations and scope","local":"transformers.model_addition_debugger_context","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="model-debugging-toolboxes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-debugging-toolboxes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model debugging toolboxes</span></h1> <p data-svelte-h="svelte-yqlsrj">This page lists all the debugging and model adding tools used by the library, as well as the utility functions it | |
| provides for it.</p> <p data-svelte-h="svelte-x293w1">Most of those are only useful if you are adding new models in the library.</p> <h2 class="relative group"><a id="model-addition-debuggers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-addition-debuggers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model addition debuggers</span></h2> <h3 class="relative group"><a id="model-addition-debugger---context-manager-for-model-adders" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-addition-debugger---context-manager-for-model-adders"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model addition debugger - context manager for model adders</span></h3> <p data-svelte-h="svelte-jmgycb">This context manager is a power user tool intended for model adders. It tracks all forward calls within a model forward | |
| and logs a slice of each input and output on a nested JSON. To note, this context manager enforces <code>torch.no_grad()</code>.</p> <h3 class="relative group"><a id="rationale" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rationale"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Rationale</span></h3> <p data-svelte-h="svelte-15big6q">When porting models to transformers, even from python to python, model adders often have to do a lot of manual | |
| operations, involving saving and loading tensors, comparing dtypes, etc. This small tool can hopefully shave off some | |
| time.</p> <h3 class="relative group"><a id="usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage</span></h3> <p data-svelte-h="svelte-jrktnj">Add this context manager as follows to debug a model:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">import</span> requests | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> LlavaProcessor, LlavaForConditionalGeneration | |
| <span class="hljs-keyword">from</span> transformers.model_debugging_utils <span class="hljs-keyword">import</span> model_addition_debugger_context | |
| torch.random.manual_seed(<span class="hljs-number">673</span>) | |
| <span class="hljs-comment"># load pretrained model and processor</span> | |
| model_id = <span class="hljs-string">"llava-hf/llava-1.5-7b-hf"</span> | |
| processor = LlavaProcessor.from_pretrained(model_id) | |
| model = LlavaForConditionalGeneration.from_pretrained(model_id) | |
| <span class="hljs-comment"># create random image input</span> | |
| random_image = Image.fromarray(torch.randint(<span class="hljs-number">0</span>, <span class="hljs-number">256</span>, (<span class="hljs-number">224</span>, <span class="hljs-number">224</span>, <span class="hljs-number">3</span>), dtype=torch.uint8).numpy()) | |
| <span class="hljs-comment"># prompt</span> | |
| prompt = <span class="hljs-string">"<image>Describe this image."</span> | |
| <span class="hljs-comment"># process inputs</span> | |
| inputs = processor(text=prompt, images=random_image, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># call forward method (not .generate!)</span> | |
| <span class="hljs-keyword">with</span> model_addition_debugger_context( | |
| model, | |
| debug_path=<span class="hljs-string">"optional_path_to_your_directory"</span>, | |
| do_prune_layers=<span class="hljs-literal">False</span> <span class="hljs-comment"># This will output ALL the layers of a model.</span> | |
| ): | |
| output = model.forward(**inputs) | |
| <!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="reading-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#reading-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Reading results</span></h3> <p data-svelte-h="svelte-1hx4vb">The debugger generates two files from the forward call, both with the same base name, but ending either with | |
| <code>_SUMMARY.json</code> or with <code>_FULL_TENSORS.json</code>.</p> <p data-svelte-h="svelte-a5x8y4">The first one will contain a summary of each module’s <em>input</em> and <em>output</em> tensor values and shapes.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"module_path"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"MolmoForConditionalGeneration"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"inputs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"args"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"kwargs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"input_ids"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 589])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.int64"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"attention_mask"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 589])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.int64"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"pixel_values"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 5, 576, 588])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.float32"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"mean"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-8.9514e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"std"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(9.2586e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-1.7923e+00, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"max"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(1.8899e+00, device='cuda:0')"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"children"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"module_path"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"MolmoForConditionalGeneration.language_model.model.embed_tokens"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"inputs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"args"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 589])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.int64"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"outputs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 589, 3584])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.float32"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"mean"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(6.5460e-06, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"std"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(2.3807e-02, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-3.3398e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"max"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(3.9453e-01, device='cuda:0')"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"module_path"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"MolmoForConditionalGeneration.vision_tower"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"inputs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"args"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([5, 1, 576, 588])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.float32"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"mean"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-8.9514e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"std"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(9.2586e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-1.7923e+00, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"max"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(1.8899e+00, device='cuda:0')"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"kwargs"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"output_hidden_states"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"True"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"children"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> ... and so on<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cxxoo9">The <code>_FULL_TENSORS.json</code> file will display a full view of all tensors, which is useful for comparing two files.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> <span class="hljs-attr">"pixel_values"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"shape"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.Size([1, 5, 576, 588])"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"torch.float32"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"value"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-string">"tensor([[[[-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" ...,"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00]],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">""</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [[-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" ...,"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.4857e+00, -1.4820e+00, -1.2100e+00, ..., -6.0979e-01, -5.9650e-01, -3.8527e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.6755e+00, -1.7221e+00, -1.4518e+00, ..., -7.5577e-01, -7.4658e-01, -5.5592e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-7.9957e-01, -8.2162e-01, -5.7014e-01, ..., -1.3689e+00, -1.3169e+00, -1.0678e+00]],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">""</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [[-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" ...,"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-3.0322e-01, -5.0645e-01, -5.8436e-01, ..., -6.2439e-01, -7.9160e-01, -8.1188e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-4.4921e-01, -6.5653e-01, -7.2656e-01, ..., -3.4702e-01, -5.2146e-01, -5.1326e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-3.4702e-01, -5.3647e-01, -5.4170e-01, ..., -1.0915e+00, -1.1968e+00, -1.0252e+00]],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">""</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [[-1.1207e+00, -1.2718e+00, -1.0678e+00, ..., 1.2013e-01, -1.3126e-01, -1.7197e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-6.9738e-01, -9.1166e-01, -8.5454e-01, ..., -5.5050e-02, -2.8134e-01, -4.2793e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-3.4702e-01, -5.5148e-01, -5.8436e-01, ..., 1.9312e-01, -8.6235e-02, -2.1463e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" ...,"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00]],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">""</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [[-1.0039e+00, -9.5669e-01, -6.5546e-01, ..., -1.4711e+00, -1.4219e+00, -1.1389e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.0039e+00, -9.5669e-01, -6.5546e-01, ..., -1.7193e+00, -1.6771e+00, -1.4091e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.6317e+00, -1.6020e+00, -1.2669e+00, ..., -1.2667e+00, -1.2268e+00, -8.9720e-01],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" ...,"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00],"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">" [-1.7923e+00, -1.7521e+00, -1.4802e+00, ..., -1.7923e+00, -1.7521e+00, -1.4802e+00]]]], device='cuda:0')"</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"mean"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-8.9514e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"std"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(9.2586e-01, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(-1.7923e+00, device='cuda:0')"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"max"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tensor(1.8899e+00, device='cuda:0')"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span><!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="saving-tensors-to-disk" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-tensors-to-disk"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving tensors to disk</span></h4> <p data-svelte-h="svelte-1e1mzyg">Some model adders may benefit from logging full tensor values to disk to support, for example, numerical analysis | |
| across implementations.</p> <p data-svelte-h="svelte-18fl08q">Set <code>use_repr=False</code> to write tensors to disk using <a href="https://huggingface.co/docs/safetensors/en/index" rel="nofollow">SafeTensors</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">with</span> model_addition_debugger_context( | |
| model, | |
| debug_path=<span class="hljs-string">"optional_path_to_your_directory"</span>, | |
| do_prune_layers=<span class="hljs-literal">False</span>, | |
| use_repr=<span class="hljs-literal">False</span>, <span class="hljs-comment"># Defaults to True</span> | |
| ): | |
| output = model.forward(**inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qdpur6">When using <code>use_repr=False</code>, tensors are written to the same disk location as the <code>_SUMMARY.json</code> and | |
| <code>_FULL_TENSORS.json</code> files. The <code>value</code> property of entries in the <code>_FULL_TENSORS.json</code> file will contain a relative | |
| path reference to the associated <code>.safetensors</code> file. Each tensor is written to its own file as the <code>data</code> property of | |
| the state dictionary. File names are constructed using the <code>module_path</code> as a prefix with a few possible postfixes that | |
| are built recursively.</p> <ul data-svelte-h="svelte-866rdu"><li>Module inputs are denoted with the <code>_inputs</code> and outputs by <code>_outputs</code>.</li> <li><code>list</code> and <code>tuple</code> instances, such as <code>args</code> or function return values, will be postfixed with <code>_{index}</code>.</li> <li><code>dict</code> instances will be postfixed with <code>_{key}</code>.</li></ul> <h3 class="relative group"><a id="comparing-between-implementations" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#comparing-between-implementations"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Comparing between implementations</span></h3> <p data-svelte-h="svelte-co7nq5">Once the forward passes of two models have been traced by the debugger, one can compare the <code>json</code> output files. See | |
| below: we can see slight differences between these two implementations’ key projection layer. Inputs are mostly | |
| identical, but not quite. Looking through the file differences makes it easier to pinpoint which layer is wrong.</p> <p data-svelte-h="svelte-9wdzxi"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/files_difference_debugging.png" alt="download-icon"></p> <h3 class="relative group"><a id="transformers.model_addition_debugger_context" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#transformers.model_addition_debugger_context"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Limitations and scope</span></h3> <p data-svelte-h="svelte-s3plvp">This feature will only work for torch-based models. Models relying heavily on external kernel calls may work, but trace will | |
| probably miss some things. Regardless, any python implementation that aims at mimicking another implementation can be | |
| traced once instead of reran N times with breakpoints.</p> <p data-svelte-h="svelte-obvwja">If you pass <code>do_prune_layers=False</code> to your model debugger, ALL the layers will be outputted to <code>json</code>. Else, only the | |
| first and last layer will be shown. This is useful when some layers (typically cross-attention) appear only after N | |
| layers.</p> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.model_addition_debugger_context"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>transformers.model_addition_debugger_context</span></h4><!-- HTML_TAG_END --> <a id="transformers.model_addition_debugger_context" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.model_addition_debugger_context"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/vr_33892/src/transformers/model_debugging_utils.py#L398" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">></span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60"></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">debug_path<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">do_prune_layers<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_repr<span class="opacity-60">: bool = True</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <h1 class="relative group"><a id="model-addition-debugger---context-manager-for-model-adders" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-addition-debugger---context-manager-for-model-adders"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model addition debugger - context manager for model adders</span></h1> <p data-svelte-h="svelte-1rya5y1">This context manager is a power user tool intended for model adders.</p> <p data-svelte-h="svelte-b3yne0">It tracks all forward calls within a model forward and logs a slice of each input and output on a nested JSON file. | |
| If <code>use_repr=True</code> (the default), the JSON file will record a <code>repr()</code>-ized version of the tensors as a list of | |
| strings. If <code>use_repr=False</code>, the full tensors will be stored in separate SafeTensors files and the JSON file will | |
| provide a relative path to that file.</p> <p data-svelte-h="svelte-3tab8i">To note, this context manager enforces <code>torch.no_grad()</code>.</p> <h2 class="relative group"><a id="usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage</span></h2> <p data-svelte-h="svelte-chd0p3">add the context manager to a model to debug</p> <div class="relative group rounded-md"><a id="transformers.model_addition_debugger_context.example" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#transformers.model_addition_debugger_context.example"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> LlavaProcessor, LlavaForConditionalGeneration, model_addition_debugger_context | |
| torch.random.manual_seed(<span class="hljs-number">673</span>) | |
| <span class="hljs-comment"># load pretrained model and processor</span> | |
| model_id = <span class="hljs-string">"llava-hf/llava-1.5-7b-hf"</span> | |
| processor = LlavaProcessor.from_pretrained(model_id) | |
| model = LlavaForConditionalGeneration.from_pretrained(model_id) | |
| <span class="hljs-comment"># create random image input</span> | |
| random_image = Image.fromarray(torch.randint(<span class="hljs-number">0</span>, <span class="hljs-number">256</span>, (<span class="hljs-number">224</span>, <span class="hljs-number">224</span>, <span class="hljs-number">3</span>), dtype=torch.uint8).numpy()) | |
| <span class="hljs-comment"># prompt</span> | |
| prompt = <span class="hljs-string">"<image>Describe this image."</span> | |
| <span class="hljs-comment"># process inputs</span> | |
| inputs = processor(text=prompt, images=random_image, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># call forward method (not .generate!)</span> | |
| <span class="hljs-keyword">with</span> model_addition_debugger_context(model, debug_path=<span class="hljs-string">"Your_debug_path"</span>, do_prune_layers=<span class="hljs-literal">False</span>): | |
| output = model.forward(**inputs)<!-- HTML_TAG_END --></pre></div></div></div> <h2 class="relative group"><a id="analyzer-of-skipped-tests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#analyzer-of-skipped-tests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Analyzer of skipped tests</span></h2> <h3 class="relative group"><a id="scan-skipped-tests---for-model-adders-and-maintainers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#scan-skipped-tests---for-model-adders-and-maintainers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Scan skipped tests - for model adders and maintainers</span></h3> <p data-svelte-h="svelte-x67jvg">This small util is a power user tool intended for model adders and maintainers. It lists all test methods | |
| existing in <code>test_modeling_common.py</code>, inherited by all model tester classes, and scans the repository to measure | |
| how many tests are being skipped and for which models.</p> <h3 class="relative group"><a id="rationale" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rationale"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Rationale</span></h3> <p data-svelte-h="svelte-t3wc4y">When porting models to transformers, tests fail as they should, and sometimes <code>test_modeling_common</code> feels irreconcilable with the peculiarities of our brand new model. But how can we be sure we’re not breaking everything by adding a seemingly innocent skip?</p> <p data-svelte-h="svelte-fh8zb2">This utility:</p> <ul data-svelte-h="svelte-14flar2"><li>scans all test_modeling_common methods</li> <li>looks for times where a method is skipped</li> <li>returns a summary json you can load as a DataFrame/inspect</li></ul> <p data-svelte-h="svelte-fqk902"><strong>For instance test_inputs_embeds is skipped in a whooping 39% proportion at the time of writing this util.</strong></p> <p data-svelte-h="svelte-18ei40l"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/f7f671f69b88ce4967e19179172c248958d35742/transformers/tests_skipped_visualisation.png" alt="download-icon"></p> <h3 class="relative group"><a id="usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage</span></h3> <p data-svelte-h="svelte-r4bkk4">You can run the skipped test analyzer in two ways:</p> <h4 class="relative group"><a id="full-scan-default" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#full-scan-default"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Full scan (default)</span></h4> <p data-svelte-h="svelte-15vjksv">From the root of <code>transformers</code> repo, scans all common test methods and outputs the results to a JSON file (default: <code>all_tests_scan_result.json</code>).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python utils/scan_skipped_tests.py --output_dir path/to/output<!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-1lhrjm3"><li><code>--output_dir</code> (optional): Directory where the JSON results will be saved. Defaults to the current directory.</li></ul> <p data-svelte-h="svelte-5gri02"><strong>Example output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->🔬 Parsing 331 model test files once each... | |
| 📝 Aggregating 224 tests... | |
| (224/224) test_update_candidate_strategy_with_matches_1es_3d_is_nonecodet_schedule_fa_kwargs | |
| ✅ Scan complete. | |
| 📄 JSON saved to /home/pablo/git/transformers/all_tests_scan_result.json | |
| <!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ihq8c8">And it will generate <code>all_tests_scan_result.json</code> file that you can inspect. The JSON is indexed by method name, and each entry follows this schema, indicating the origin as well (from <code>common</code>or <code>GenerationMixin</code>.)</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"<method_name>"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"origin"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"<test suite>"</span> | |
| <span class="hljs-attr">"models_ran"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"<model_name>"</span><span class="hljs-punctuation">,</span> ...<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"models_skipped"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"<model_name>"</span><span class="hljs-punctuation">,</span> ...<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"skipped_proportion"</span><span class="hljs-punctuation">:</span> <float><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"reasons_skipped"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"<model_name>: <reason>"</span><span class="hljs-punctuation">,</span> | |
| ... | |
| <span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| ... | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ob2wvw">Which you can visualise as above with e.g. <code>pandas</code></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->df = pd.read_json(<span class="hljs-string">'all_tests_scan_result.json'</span>).T | |
| df.sort_values(by=[<span class="hljs-string">'skipped_proportion'</span>], ascending=<span class="hljs-literal">False</span>) | |
| <!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="scan-a-single-test-method" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#scan-a-single-test-method"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Scan a single test method</span></h3> <p data-svelte-h="svelte-1tts0fd">You can focus on a specific test method using <code>--test_method_name</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python utils/scan_skipped_tests.py --test_method_name test_inputs_embeds --output_dir path/to/output<!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-cs9cqj"><li><code>--test_method_name</code>: Name of the test method to scan (e.g., <code>test_inputs_embeds</code>).</li> <li><code>--output_dir</code> (optional): Directory where the JSON result will be saved.</li></ul> <p data-svelte-h="svelte-5gri02"><strong>Example output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ python utils/scan_skipped_tests.py --test_method_name test_inputs_embeds | |
| 🔬 Parsing 331 model <span class="hljs-built_in">test</span> files once each... | |
| == test_inputs_embeds == | |
| Ran : 199/323 | |
| Skipped : 124/323 (38.4%) | |
| - aimv2: Aimv2 does not use inputs_embeds | |
| - align: Inputs_embeds is tested <span class="hljs-keyword">in</span> individual model tests | |
| - altclip: Inputs_embeds is tested <span class="hljs-keyword">in</span> individual model tests | |
| - audio_spectrogram_transformer: AST does not use inputs_embeds | |
| - beit: BEiT does not use inputs_embeds | |
| - bit: Bit does not use inputs_embeds | |
| - blip: Blip does not use inputs_embeds | |
| - blip_2: Inputs_embeds is tested <span class="hljs-keyword">in</span> individual model tests | |
| - bridgetower: | |
| - canine: CANINE does not have a get_input_embeddings() method. | |
| - ... | |
| 📄 JSON saved to /home/pablo/git/transformers/scan_test_inputs_embeds.json | |
| <!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="modular-model-detector" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#modular-model-detector"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Modular model detector</span></h2> <h3 class="relative group"><a id="code-similarity-analyzer---for-model-adders" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#code-similarity-analyzer---for-model-adders"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Code similarity analyzer - for model adders</span></h3> <p data-svelte-h="svelte-1bknayl">This utility analyzes code similarities between model implementations to identify opportunities for modularization. It compares a new or existing modeling file against all models in the library using embedding-based and token-based similarity metrics.</p> <h3 class="relative group"><a id="rationale" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rationale"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Rationale</span></h3> <p data-svelte-h="svelte-umq6n3">When adding a new model to transformers, many components (attention layers, MLPs, outputs, etc.) may already exist in similar form in other models. Instead of implementing everything from scratch, model adders can identify which existing classes are similar and potentially reusable through modularization.</p> <p data-svelte-h="svelte-cfy7t">The tool computes two similarity scores:</p> <ul data-svelte-h="svelte-171fio5"><li><strong>Embedding score</strong>: Uses semantic code embeddings (via <code>Qwen/Qwen3-Embedding-4B</code>) to detect functionally similar code even with different naming</li> <li><strong>Jaccard score</strong>: Measures token set overlap to identify structurally similar code patterns</li></ul> <p data-svelte-h="svelte-12ehpme">A score of 1.00 means the code is identical.</p> <h3 class="relative group"><a id="usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage</span></h3> <p data-svelte-h="svelte-3oz7pq">From the root of the <code>transformers</code> repository:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python utils/modular_model_detector.py --modeling-file path/to/modeling_file.py<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12idtgh">The tool will automatically download the pre-built index from the Hub (requires RAM/VRAM for the embedding model).</p> <p data-svelte-h="svelte-5gri02"><strong>Example output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Loading checkpoint shards: 100%|████████████████████| 2/2 [00:00<00:00, 33.62it/s] | |
| encoding 21 query definitions with Qwen/Qwen3-Embedding-4B (device=cuda, batch=16, max_length=4096) | |
| stuff.py::Beit3ImageTextMatchingOutput: | |
| embedding: | |
| blip_2::Blip2ImageTextMatchingModelOutput (0.9994) | |
| chinese_clip::ChineseCLIPOutput (0.9818) | |
| owlvit::OwlViTOutput (0.9818) | |
| jaccard: | |
| owlv2::Owlv2Output (0.9667) | |
| metaclip_2::MetaClip2Output (0.9667) | |
| altclip::AltCLIPOutput (0.9667) | |
| intersection: | |
| blip::BlipOutput | |
| owlvit::OwlViTOutput | |
| stuff.py::Beit3MLP: | |
| embedding: | |
| efficientloftr::EfficientLoFTRMLP (0.9718) | |
| seggpt::SegGptMlp (0.9650) | |
| jaccard: | |
| chinese_clip::ChineseCLIPTextSelfOutput (0.5294) | |
| bert::BertSelfOutput (0.5294) | |
| intersection:<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-chl865">The <code>intersection</code> field shows classes that appear in both top-5 results, indicating high confidence for modularization candidates.</p> <h3 class="relative group"><a id="building-a-custom-index" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#building-a-custom-index"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Building a custom index</span></h3> <p data-svelte-h="svelte-1okqu5g">To rebuild the index from your local codebase (useful after adding new models or using a different embedding model):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python utils/modular_model_detector.py --build<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-fkg9lu">To push the rebuilt index to a Hub dataset:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python utils/modular_model_detector.py --build --push-new-index --hub-dataset your-org/your-dataset<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Options</span></h3> <ul data-svelte-h="svelte-f1qsum"><li><code>--modeling-file</code>: Path to the modeling file to analyze</li> <li><code>--build</code>: Build the code similarity index from all modeling files in <code>src/transformers/models/</code></li> <li><code>--push-new-index</code>: After building, push the index to a Hub dataset (requires <code>--build</code>)</li> <li><code>--hub-dataset</code>: Hub dataset repository ID to pull/push the index (default: <code>hf-internal-testing/transformers_code_embeddings</code>)</li></ul> <h3 class="relative group"><a id="limitations" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#limitations"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Limitations</span></h3> <p data-svelte-h="svelte-1iqccdv">This tool requires GPU/CPU resources to run the embedding model (<code>Qwen/Qwen3-Embedding-4B</code>). The pre-built index is downloaded from the Hub by default, which requires an internet connection on first use.</p> <p data-svelte-h="svelte-1y7y90y">Results are suggestions based on code similarity and should be manually reviewed before modularization. High similarity scores don’t guarantee perfect compatibility.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/internal/model_debugging_utils.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_16tnnm8 = { | |
| assets: "/docs/transformers/pr_33892/en", | |
| base: "/docs/transformers/pr_33892/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js"), | |
| import("/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 40], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 93.4 kB
- Xet hash:
- 6807226576c0e5166f8d5013762915af2b2ff6986200c2eb922d2935a20e9d08
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.